Skip to content
aprod.c 15.1 KiB
Newer Older
Fabio Roberto Vitello's avatar
Fabio Roberto Vitello committed
#ifdef OMP
#include <omp.h>
#endif
#include <stdlib.h>
#include <stdio.h>
#include "mpi.h"
//#include "pardef.h"
#include "util.h"
void aprod(int mode, long int m, long int n, double *vVect, double *knownTerms,
                              double *systemMatrix,long int *matrixIndex, int *instrCol,int *instrConstrIlung, struct comData comlsqr,time_t *ompSec)
Fabio Roberto Vitello's avatar
Fabio Roberto Vitello committed
{
// Parallel definitions
    int myid,nproc;
Fabio Roberto Vitello's avatar
Fabio Roberto Vitello committed
    long int *mapNoss, *mapNcoeff;
	int nthreads, tid;
	long **mapForThread;
///    struct comData *comlsqr;
//
  	double zero=0.0;
  	double sum, yi;
  	long int l1,l2;
  	long int i, i1; 
  	long int l, j, k;
  	int i2=0,j2=0,j3=0,na=0;
Fabio Roberto Vitello's avatar
Fabio Roberto Vitello committed
    int setBound[4];
  
  	double localSum;
    short nAstroPSolved=comlsqr.nAstroPSolved;
    long localAstro= comlsqr.VrIdAstroPDim*nAstroPSolved;
  	long localAstroMax=comlsqr.VrIdAstroPDimMax*nAstroPSolved;  
//  Initialize.
myid=comlsqr.myid;
nproc=comlsqr.nproc;
mapNcoeff=comlsqr.mapNcoeff;
mapNoss=comlsqr.mapNoss;
nthreads=comlsqr.nthreads;
mapForThread=comlsqr.mapForThread;
int multMI=comlsqr.multMI;
long nparam=comlsqr.parOss;
short nAttAxes=comlsqr.nAttAxes;
int numOfExtStar=comlsqr.numOfExtStar;
int numOfBarStar=comlsqr.numOfBarStar;
int numOfExtAttCol=comlsqr.numOfExtAttCol;
long VrIdAstroPDimMax=comlsqr.VrIdAstroPDimMax;
int startingAttColExtConstr=comlsqr.startingAttColExtConstr;
int nOfElextObs=comlsqr.nOfElextObs;
int nEqExtConstr=comlsqr.nEqExtConstr;
int nOfElBarObs=comlsqr.nOfElBarObs;
int nEqBarConstr=comlsqr.nEqBarConstr;
int debugMode=comlsqr.debugMode;
short nInstrPSolved=comlsqr.nInstrPSolved;
int nOfInstrConstr=comlsqr.nOfInstrConstr;
int nElemIC=comlsqr.nElemIC;
short nAttP=comlsqr.nAttP;
short nGlobP=comlsqr.nGlobP;
    
setBound[0]=comlsqr.setBound[0];
    setBound[1]=comlsqr.setBound[1];
    setBound[2]=comlsqr.setBound[2];
    setBound[3]=comlsqr.setBound[3];
    long nDegFreedomAtt=comlsqr.nDegFreedomAtt;
    short nAttParAxis=comlsqr.nAttParAxis;
    long offsetAttParam=comlsqr.offsetAttParam;
    long offsetInstrParam=comlsqr.offsetInstrParam;
    long offsetGlobParam=comlsqr.offsetGlobParam;
    
nthreads=1; 
tid=0;
    FILE *fp1,*fp2;
//        fp1=fopen("test1_aprod","w");
//        fp2=fopen("test2_aprod","w");
if(mode!=1 && mode !=2)
{
	printf("ERROR: Invalid mode=%d in aprod function\n",mode);
	exit(1);
}
l1=0;
l2=0;
myid=comlsqr.myid;
if(mode==1)
{
    time_t startTime=time(NULL);
#pragma omp parallel private(myid, sum,  k, l1, l2, l, j,tid,nthreads,i2,na) shared(mapNoss,instrCol,comlsqr,vVect,systemMatrix,matrixIndex,knownTerms,j2)
        myid=comlsqr.myid;
        if(comlsqr.itn==1)
Fabio Roberto Vitello's avatar
Fabio Roberto Vitello committed
        {
#ifdef OMP
            tid = omp_get_thread_num();
            nthreads = omp_get_num_threads();
#endif
        }
        if(comlsqr.itn==1 && debugMode)
            printf("PE=%d Aprod1 OpenMP num of threads =%d from thread =%d icycle=%ld  comlsqr.itn=%d\n",myid, nthreads,tid,i,comlsqr.itn);
    long miValAstro=0;
    long miValAtt=0;
    long jstartAtt=0;
    long jstartAstro=0;
    long lset=0;
        long offLocalAstro=0;
        long offLocalAtt=0;
        long offLocalInstr=0; //Offset on Instruments
        long ixInstr=0;
        int nInstrVal=0;
        offLocalInstr=offsetInstrParam+(localAstroMax-offsetAttParam); //Offset on Instruments
        nInstrVal=nAstroPSolved+nAttP;
        offLocalAstro=comlsqr.mapStar[myid][0]*nAstroPSolved; //Offset on my mstars
        offLocalAtt=localAstroMax-offsetAttParam; //Offset on attitude
        long offLocalGlob=offsetGlobParam+(localAstroMax-offsetAttParam); //Offset on GlobP
        int nGlobVal=nAstroPSolved+nAttP+nInstrPSolved;
        jstartAstro=miValAstro-offLocalAstro;
#pragma omp for
    for(long ix=0;ix<mapNoss[myid];ix++){
        sum=0.;
/////////////////////////////////////////////////////
/// Mode 1 Astrometric Sect
if (nAstroPSolved){
    
    lset=ix*nparam;
        if(matrixIndex[multMI*ix]!= miValAstro){
            miValAstro=matrixIndex[multMI*ix];
            jstartAstro=miValAstro-offLocalAstro;
        }
        for(long jx=jstartAstro;jx<jstartAstro+nAstroPSolved;jx++){
            sum=sum+systemMatrix[lset]*vVect[jx];
            lset++;
        }
}
//////////////////////////////////////////////////////
/// Mode 1 Attitude Sect
if (nAttP){
    
        lset=ix*nparam+nAstroPSolved;
        miValAtt=matrixIndex[multMI*ix+(multMI-1)];
        for(int nax=0;nax<nAttAxes;nax++){
            jstartAtt=miValAtt+offLocalAtt+nax*nDegFreedomAtt;
            for(long inpax=jstartAtt;inpax<jstartAtt+nAttParAxis;inpax++)
                sum=sum+systemMatrix[lset]*vVect[inpax];
                lset++;
        }
}
//////////////////////////////////////////////////////
/// Mode 1 Instrument Sect
if (nInstrPSolved){
        lset=ix*nparam+nInstrVal;
        long iiVal=ix*nInstrPSolved;
        for(int inInstr=0;inInstr<nInstrPSolved;inInstr++){
            ixInstr=offLocalInstr+instrCol[iiVal+inInstr];
            sum=sum+systemMatrix[lset]*vVect[ixInstr];
            lset++;
        }
}
//////////////////////////////////////////////////////
/// Mode 1 Global sect
if (nGlobP){
        lset=ix*nparam+nGlobVal;
        for(long inGlob=offLocalGlob;inGlob<offLocalGlob+nGlobP;inGlob++){
            sum=sum+systemMatrix[lset]*vVect[inGlob];
            lset++;
        }
}
//////////////////////////////////////////////////////
        knownTerms[ix]+=sum;
} //for ix
/// Mode 1 ExtConstr
if(nEqExtConstr){
    long offExtAtt;
    long offExtAttConstr=VrIdAstroPDimMax*nAstroPSolved+startingAttColExtConstr;
    long vVIx;
    long ktIx=mapNoss[myid];
    long offExtConstr;
    for(int iexc=0;iexc<nEqExtConstr;iexc++ ){
        sum=0.0;
        offExtConstr=mapNcoeff[myid]+iexc*nOfElextObs;
#pragma omp for
        for(int j3=0;j3<numOfExtStar*nAstroPSolved;j3++)
            sum+=systemMatrix[offExtConstr+j3]*vVect[j3];
        for(int nax=0;nax<nAttAxes;nax++){
            offExtAtt=offExtConstr+numOfExtStar*nAstroPSolved+nax*numOfExtAttCol;
            vVIx=offExtAttConstr+nax*nDegFreedomAtt;
#pragma omp for
            for(int j3=0;j3<numOfExtAttCol;j3++)
                sum+=systemMatrix[offExtAtt+j3]*vVect[vVIx+j3];
Fabio Roberto Vitello's avatar
Fabio Roberto Vitello committed
            }
#pragma omp atomic
        knownTerms[ktIx+iexc]+=sum;
    }//for iexc
}
//////////////////////////////////////////////////////
/// Mode 1 BarConstr
if(nEqBarConstr){
    long offBarConstr=mapNcoeff[myid]+nOfElextObs*nEqExtConstr;
    long offBarConstrIx;
    long ktIx=mapNoss[myid]+nEqExtConstr;
    for(int iexc=0;iexc<nEqBarConstr;iexc++ ){
        sum=0.0;
        offBarConstrIx=offBarConstr+iexc*nOfElBarObs;
#pragma omp for
        for(int j3=0;j3<numOfBarStar*nAstroPSolved;j3++)
            sum+=systemMatrix[offBarConstrIx+j3]*vVect[j3];
#pragma omp atomic
        knownTerms[ktIx+iexc]+=sum;
    }//for iexc
}
//////////////////////////////////////////////////////
/// Mode 1 InstrConstr
if(nOfInstrConstr){
    long offSetInstr=0;
    long offSetInstrInc=0;
    long offSetInstrInc1=0;
    long vVix=0;
    long offvV=0;
    long offSetInstrConstr=mapNcoeff[myid]+nOfElextObs*nEqExtConstr+nOfElBarObs*nEqBarConstr;
    long offSetInstrConstr1=VrIdAstroPDimMax*nAstroPSolved+nDegFreedomAtt*nAttAxes;
    long ktIx=mapNoss[myid]+nEqExtConstr+nEqBarConstr;
    for(int i1=myid;i1<nOfInstrConstr;i1=i1+nproc){
        sum=0.0;
        offSetInstrInc=offSetInstrConstr;
        offSetInstr=0;
        for(int m=0;m<i1;m++){
            offSetInstrInc+=instrConstrIlung[m];
            offSetInstr+=instrConstrIlung[m];
        }
        offvV=mapNoss[myid]*nInstrPSolved+offSetInstr;
#pragma omp for
        for(int j3=0;j3<instrConstrIlung[i1];j3++){
            vVix=instrCol[offvV+j3];
            sum+=systemMatrix[offSetInstrInc+j3]*vVect[offSetInstrConstr1+vVix];
        }
#pragma omp atomic
         knownTerms[ktIx+i1]+=sum;
    }
}
//////////////////////////////////////////////////////
}//pragma
*ompSec+=time(NULL)-startTime;
if(comlsqr.itn<=2 &&(myid==0 || myid==nproc-1 || debugMode==1)) printf("PE=%d AprodTiming: mode=1  OmpSec=%ld\n",myid,time(NULL)-startTime);
 }
else  //mode==2
{  //if(mode
time_t startTime=time(NULL);
#pragma omp parallel private(myid,yi, sum, k, l1, l2, l, j,localSum,i,tid,nthreads) shared(mapNoss,instrCol,comlsqr,vVect,systemMatrix,matrixIndex,knownTerms)
{
int count=0;
myid=comlsqr.myid;
#ifdef OMP
	tid = omp_get_thread_num();
    nthreads = omp_get_num_threads();
#endif
/////////////////////////////////////////////////////
/// Mode 2 Astrometric Sect
if (nAstroPSolved){
    long offLocalAstro=comlsqr.mapStar[myid][0]*nAstroPSolved;
    long jstartAstro=0;
    long lset=mapForThread[tid][0]*nparam;
    for(long ix=mapForThread[tid][0];ix<mapForThread[tid][2];ix++){
        long miValAstro=0;
        if(matrixIndex[multMI*ix]!= miValAstro){
            miValAstro=matrixIndex[multMI*ix];
            jstartAstro=miValAstro-offLocalAstro;
        }
        for(long jx=jstartAstro;jx<jstartAstro+nAstroPSolved;jx++){
            localSum=systemMatrix[lset]*knownTerms[ix];
            vVect[jx]+=localSum;
            lset++;
        }//for jx
        lset+=nparam-nAstroPSolved;
    }//for ix
}
/////////////////////////////////////////////////////
} //pragma
if(comlsqr.itn<=2 &&(myid==0 || myid==nproc-1 || debugMode==1) )printf("PE=%d AprodTiming: mode=2.1.AstroP  OmpSec=%ld\n",myid,time(NULL)-startTime);
/////////////////////////////////////////////////////
/// Mode 2 Attitude Sect
if (nAttP){
    long lset;
    long mix;
    long offj=(localAstroMax-offsetAttParam);
    long vVix;
///#pragma omp for
    for(long ix=0;ix<mapNoss[myid];ix++){
        lset=nparam*ix+nAstroPSolved;
        mix=matrixIndex[multMI*ix+(multMI-1)]+offj;
        for(int ly=0;ly<nAttAxes;ly++){
            vVix=mix+ly*nDegFreedomAtt;
            for(int lx=0;lx<nAttParAxis;lx++){
                localSum=systemMatrix[lset]*knownTerms[ix];
///#pragma omp atomic
                vVect[vVix]+=localSum;
                lset++;
                vVix++;
            }//for lx
        } //for ly
    }//for ix
}
    if(comlsqr.itn<=2 &&(myid==0 || myid==nproc-1 || debugMode==1) )printf("PE=%d AprodTiming: mode=2.1.AttP  OmpSec=%ld\n",myid,time(NULL)-startTime);

/////////////////////////////////////////////////////
/// Mode 2 Instrument Sect
if(nOfInstrConstr){
    long lset;
    int offLset=nAstroPSolved+nAttP;
    long iix;
    long offj=offsetInstrParam+(localAstroMax-offsetAttParam);
    long vVix;
///#pragma omp for
    for(long ix=0;ix<mapNoss[myid];ix++){
        lset=nparam*ix+offLset;
        iix=ix*nInstrPSolved;
        for(int ly=0;ly<nInstrPSolved;ly++){
            vVix=offj+instrCol[iix+ly];
            localSum=systemMatrix[lset]*knownTerms[ix];
///#pragma omp atomic
            vVect[vVix]+=localSum;
            lset++;
        }//for ly
    }//for ix
}
if(comlsqr.itn<=2 &&(myid==0 || myid==nproc-1 || debugMode==1) )printf("PE=%d AprodTiming: mode=2.1.InstrP  OmpSec=%ld\n",myid,time(NULL)-startTime);
/////////////////////////////////////////////////////
///} //pragma
*ompSec+=time(NULL)-startTime;
if(comlsqr.itn<=2 &&(myid==0 || myid==nproc-1 || debugMode==1) )printf("PE=%d AprodTiming: mode=2.1  OmpSec=%ld\n",myid,time(NULL)-startTime);
/////////////////////////////////////////////////////
/// Mode 2 Global Sect
if(nGlobP){
    long lset;
    int offLset=nAstroPSolved+nAttP+nInstrPSolved;
    long offj=offsetGlobParam+(localAstroMax-offsetAttParam);
    long vVix;
    for(long ix=0;ix<mapNoss[myid];ix++){
        lset=nparam*ix+offLset;
        for(long ly=0;ly<nGlobP;ly++){
            vVix=offj+ly;
            localSum=systemMatrix[lset]*knownTerms[ix];
            vVect[vVix]+=localSum;
            lset++;
        }//for ly
    }//for ix
}
////////////////////////////////////////////////////
    startTime=time(NULL);
    
#pragma omp parallel private(myid,yi,localSum,tid,nthreads,i2,j2,na) shared(mapNoss,vVect,systemMatrix,knownTerms,k,j3)
{
myid=comlsqr.myid;
#ifdef OMP
    tid = omp_get_thread_num();
    nthreads = omp_get_num_threads();
#endif
    localSum=0.0;
//////////////////////////////////////////////////////
/// Mode 2 ExtConstr
if(nEqExtConstr){
    long offExtStarConstrEq;
    long off2;
    long off3;
    for(int ix=0;ix<nEqExtConstr;ix++ ){  //stars
        yi=knownTerms[mapNoss[myid]+ix];
        offExtStarConstrEq=mapNcoeff[myid]+ix*nOfElextObs;//Star offset on the row ix (all the row)
#pragma omp for
        for(long yx=0;yx<numOfExtStar;yx++){
            off3=yx*nAstroPSolved;
            off2=offExtStarConstrEq+off3;
            for(int j2=0;j2<nAstroPSolved;j2++){
                localSum= systemMatrix[off2+j2]*yi;
                vVect[j2+off3]+=localSum;
#pragma omp barrier
    } //for ix
    
    long offExtAttConstrEq;
    long offExtUnk;
    long off1=VrIdAstroPDimMax*nAstroPSolved+startingAttColExtConstr;
    for(int ix=0;ix<nEqExtConstr;ix++ ){  //att
        yi=knownTerms[mapNoss[myid]+ix];
        offExtAttConstrEq=mapNcoeff[myid]+ix*nOfElextObs; //Att offset on the row ix (all the row)
        offExtAttConstrEq+=numOfExtStar*nAstroPSolved; //Att offset inside ix row
        for(int nax=0;nax<nAttAxes;nax++){
            offExtUnk=off1+nax*nDegFreedomAtt;// Att offset for Unk array on extConstr
            off2=offExtAttConstrEq+nax*numOfExtAttCol;
#pragma omp for
            for(int jx=0;jx<numOfExtAttCol;jx++){
                localSum= systemMatrix[off2+jx]*yi;
                vVect[offExtUnk+jx]+=localSum;
Fabio Roberto Vitello's avatar
Fabio Roberto Vitello committed
            }
#pragma omp barrier
Fabio Roberto Vitello's avatar
Fabio Roberto Vitello committed
        }
    }
}
//////////////////////////////////////////////////////
/// Mode 2 BarConstr
if(nEqBarConstr){
    localSum=0.0;
    long off3;
    long off2;
    for(int ix=0;ix<nEqBarConstr;ix++ ){  //stars
        yi=knownTerms[mapNoss[myid]+nEqExtConstr+ix];
        long offBarStarConstrEq=mapNcoeff[myid]+nEqExtConstr*nOfElextObs+ix*nOfElBarObs;//Star offset on the row i2 (all the row)
#pragma omp for
        for(long yx=0;yx<numOfBarStar;yx++){
            off3=yx*nAstroPSolved;
            off2=offBarStarConstrEq+off3;
            for(int j2=0;j2<nAstroPSolved;j2++){
                localSum= systemMatrix[off2+j2]*yi;
                vVect[j2+off3]+=localSum;
        }
#pragma omp barrier
    } //for i2
}
//////////////////////////////////////////////////////
/// Mode 2 InstrConstr
if(nOfInstrConstr){
    localSum=0.0;
    int offSetInstr;
    long off1;
    long offInstrUnk=VrIdAstroPDimMax*nAstroPSolved+nAttAxes*nDegFreedomAtt;
    long off2=mapNoss[myid]+nEqExtConstr+nEqBarConstr;
    long off3=mapNcoeff[myid]+nOfElextObs*nEqExtConstr+nOfElBarObs*nEqBarConstr;
    long off4=mapNoss[myid]*nInstrPSolved;
    long off5;
    long off6;
    for(int k1=myid;k1<nOfInstrConstr;k1=k1+nproc){
        yi=knownTerms[off2+k1];
        offSetInstr=0;
        for(int m=0;m<k1;m++)
            offSetInstr+=instrConstrIlung[m];
            
        off1=off3+offSetInstr;
        off5=off4+offSetInstr;
#pragma omp for
            for(int j=0;j<instrConstrIlung[k1];j++){
                localSum=systemMatrix[off1+j]*yi;
                off6=offInstrUnk+instrCol[off5+j];
                vVect[off6]+=localSum;
} //pragma
*ompSec+=time(NULL)-startTime;
if(comlsqr.itn<=2 &&(myid==0 || myid==nproc-1 || debugMode==1)) printf("PE=%d AprodTiming: mode=2.2  OmpSec=%ld\n",myid,time(NULL)-startTime);
}// else if(mode==2)