Loading src/libnptm/cublas_calls.cpp +6 −6 Original line number Diff line number Diff line Loading @@ -143,12 +143,12 @@ void cublas_zinvert_and_refine(dcomplex **mat, np_int n, int &maxiters, double & cudacall(cudaMalloc<cuDoubleComplex>(&d_a_residual, m*m*sizeof(cuDoubleComplex))); cudacall(cudaMalloc<cuDoubleComplex>(&d_a_refine, m*m*sizeof(cuDoubleComplex))); // allocate memory for the temporary matrix products dcomplex *native_id = new dcomplex[m]; for (np_int i=0; i<m; i++) native_id[i] = 1; dcomplex *native_id = new dcomplex[1]; native_id[0] = 1; cuDoubleComplex *m_id = (cuDoubleComplex *) &(native_id[0]); // fill it with 1 cudacall(cudaMalloc<cuDoubleComplex>(&d_id, m*sizeof(cuDoubleComplex))); cudacall(cudaMemcpy(d_id, m_id, m*sizeof(cuDoubleComplex),cudaMemcpyHostToDevice)); // copy identity to device vector cudacall(cudaMalloc<cuDoubleComplex>(&d_id, sizeof(cuDoubleComplex))); cudacall(cudaMemcpy(d_id, m_id, sizeof(cuDoubleComplex),cudaMemcpyHostToDevice)); // copy identity to device vector delete[] native_id; // free identity vector on host } bool iteraterefine = true; Loading @@ -167,7 +167,7 @@ void cublas_zinvert_and_refine(dcomplex **mat, np_int n, int &maxiters, double & // Fortran-style arrays, whereas our arrays are C-style row-major. cublascall(CUZGEMM(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, m, m, &cu_mone, d_c, m, d_a, m, &cu_zero, d_a_residual, m)); // add the identity to the product cublascall(CUZAXPY(handle, m, &cu_one, d_id, 1, d_a_residual, m+1)); cublascall(CUZAXPY(handle, m, &cu_one, d_id, 0, d_a_residual, m+1)); double oldmax=0; if (refinemode >0) { np_int maxindex; Loading @@ -191,7 +191,7 @@ void cublas_zinvert_and_refine(dcomplex **mat, np_int n, int &maxiters, double & // multiply minus the original matrix times the new inverse matrix cublascall(CUZGEMM(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, m, m, &cu_mone, d_c, m, d_a, m, &cu_zero, d_a_residual, m)); // add the identity to the product cublascall(CUZAXPY(handle, m, &cu_one, d_id, 1, d_a_residual, m+1)); cublascall(CUZAXPY(handle, m, &cu_one, d_id, 0, d_a_residual, m+1)); if ((refinemode==2) || ((refinemode==1) && (iter == (maxiters-1)))) { // find the maximum absolute value of the residual np_int maxindex; Loading src/libnptm/lapack_calls.cpp +8 −9 Original line number Diff line number Diff line Loading @@ -92,6 +92,7 @@ void zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters, doubl #endif np_int nn = n*n; np_int incx = 1; np_int incx0 = 0; #ifdef USE_MKL MKL_Complex16 *arr_orig = NULL; MKL_Complex16 *arr_residual = NULL; Loading @@ -108,17 +109,15 @@ void zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters, doubl arr_orig = new MKL_Complex16[nn]; arr_residual = new MKL_Complex16[nn]; arr_refine = new MKL_Complex16[nn]; id = new MKL_Complex16[n]; for (np_int i=0; i<n ; i++) { id[i].real = 1; id[i].imag = 0; } id = new MKL_Complex16[1]; id[0].real = 1; id[0].imag = 0; #else arr_orig = new dcomplex[nn]; arr_residual = new dcomplex[nn]; arr_refine = new dcomplex[nn]; id = new dcomplex[n]; for (np_int i=0; i<n ; i++) id[i] = (dcomplex) 1; id = new dcomplex[1]; id[0] = (dcomplex) 1; #endif zcopy_(&nn, arr, &incx, arr_orig, &incx); } Loading Loading @@ -153,7 +152,7 @@ void zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters, doubl // Fortran-style arrays, whereas our arrays are C-style row-major. zgemm_(&transa, &transa, &n, &n, &n, &dcmone, arr, &n, arr_orig, &n, &dczero, arr_residual, &n); np_int incy = n+1; zaxpy_(&n, &dcone, id, &incx, arr_residual, &incy); zaxpy_(&n, &dcone, id, &incx0, arr_residual, &incy); double oldmax = 0; if (refinemode >0) { np_int maxindex = izamax_(&nn, arr_residual, &incx); Loading @@ -171,7 +170,7 @@ void zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters, doubl zaxpy_(&nn, &dcone, arr_refine, &incx, arr, &incx); // zcopy_(&nn, arr_refine, &incx, arr, &incx); zgemm_(&transa, &transa, &n, &n, &n, &dcmone, arr, &n, arr_orig, &n, &dczero, arr_residual, &n); zaxpy_(&n, &dcone, id, &incx, arr_residual, &incy); zaxpy_(&n, &dcone, id, &incx0, arr_residual, &incy); if ((refinemode==2) || ((refinemode==1) && (iter == (maxiters-1)))) { np_int maxindex = izamax_(&nn, arr_residual, &incx); #ifdef USE_MKL Loading src/libnptm/magma_calls.cpp +6 −6 Original line number Diff line number Diff line Loading @@ -132,16 +132,16 @@ void magma_zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters, } // allocate memory for the identity vector on the host { dcomplex *native_id = new dcomplex[m]; for (magma_int_t i=0; i<m; i++) native_id[i] = 1; dcomplex *native_id = new dcomplex[1]; native_id[0] = 1; magmaDoubleComplex *id = (magmaDoubleComplex *) &(native_id[0]); // fill it with 1 err = magma_zmalloc(&d_id, m); err = magma_zmalloc(&d_id, 1); if (err != MAGMA_SUCCESS) { printf("Error allocating d_id\n"); exit(1); } magma_zsetvector(m, id, 1, d_id, 1, queue); // copy identity to device vector magma_zsetvector(1, id, 1, d_id, 1, queue); // copy identity to device vector delete[] native_id; // free identity vector on host } } Loading @@ -161,7 +161,7 @@ void magma_zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters, // Fortran-style arrays, whereas our arrays are C-style row-major. magma_zgemm(MagmaNoTrans, MagmaNoTrans, m, m, m, magma_mone, d_a, m, d_a_orig, m, magma_zero, d_a_residual, m, queue); // add the identity to the product magma_zaxpy (m, magma_one, d_id, 1, d_a_residual, m+1, queue); magma_zaxpy (m, magma_one, d_id, 0, d_a_residual, m+1, queue); double oldmax=0; if (refinemode >0) { // find the maximum absolute value of the residual Loading @@ -184,7 +184,7 @@ void magma_zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters, // multiply minus the original matrix times the new inverse matrix magma_zgemm(MagmaNoTrans, MagmaNoTrans, m, m, m, magma_mone, d_a, m, d_a_orig, m, magma_zero, d_a_residual, m, queue); // add the identity to the product magma_zaxpy (m, magma_one, d_id, 1, d_a_residual, m+1, queue); magma_zaxpy (m, magma_one, d_id, 0, d_a_residual, m+1, queue); if ((refinemode==2) || ((refinemode==1) && (iter == (maxiters-1)))) { // find the maximum absolute value of the residual magma_int_t maxindex = magma_izamax(mm, d_a_residual, 1, queue); Loading Loading
src/libnptm/cublas_calls.cpp +6 −6 Original line number Diff line number Diff line Loading @@ -143,12 +143,12 @@ void cublas_zinvert_and_refine(dcomplex **mat, np_int n, int &maxiters, double & cudacall(cudaMalloc<cuDoubleComplex>(&d_a_residual, m*m*sizeof(cuDoubleComplex))); cudacall(cudaMalloc<cuDoubleComplex>(&d_a_refine, m*m*sizeof(cuDoubleComplex))); // allocate memory for the temporary matrix products dcomplex *native_id = new dcomplex[m]; for (np_int i=0; i<m; i++) native_id[i] = 1; dcomplex *native_id = new dcomplex[1]; native_id[0] = 1; cuDoubleComplex *m_id = (cuDoubleComplex *) &(native_id[0]); // fill it with 1 cudacall(cudaMalloc<cuDoubleComplex>(&d_id, m*sizeof(cuDoubleComplex))); cudacall(cudaMemcpy(d_id, m_id, m*sizeof(cuDoubleComplex),cudaMemcpyHostToDevice)); // copy identity to device vector cudacall(cudaMalloc<cuDoubleComplex>(&d_id, sizeof(cuDoubleComplex))); cudacall(cudaMemcpy(d_id, m_id, sizeof(cuDoubleComplex),cudaMemcpyHostToDevice)); // copy identity to device vector delete[] native_id; // free identity vector on host } bool iteraterefine = true; Loading @@ -167,7 +167,7 @@ void cublas_zinvert_and_refine(dcomplex **mat, np_int n, int &maxiters, double & // Fortran-style arrays, whereas our arrays are C-style row-major. cublascall(CUZGEMM(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, m, m, &cu_mone, d_c, m, d_a, m, &cu_zero, d_a_residual, m)); // add the identity to the product cublascall(CUZAXPY(handle, m, &cu_one, d_id, 1, d_a_residual, m+1)); cublascall(CUZAXPY(handle, m, &cu_one, d_id, 0, d_a_residual, m+1)); double oldmax=0; if (refinemode >0) { np_int maxindex; Loading @@ -191,7 +191,7 @@ void cublas_zinvert_and_refine(dcomplex **mat, np_int n, int &maxiters, double & // multiply minus the original matrix times the new inverse matrix cublascall(CUZGEMM(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, m, m, &cu_mone, d_c, m, d_a, m, &cu_zero, d_a_residual, m)); // add the identity to the product cublascall(CUZAXPY(handle, m, &cu_one, d_id, 1, d_a_residual, m+1)); cublascall(CUZAXPY(handle, m, &cu_one, d_id, 0, d_a_residual, m+1)); if ((refinemode==2) || ((refinemode==1) && (iter == (maxiters-1)))) { // find the maximum absolute value of the residual np_int maxindex; Loading
src/libnptm/lapack_calls.cpp +8 −9 Original line number Diff line number Diff line Loading @@ -92,6 +92,7 @@ void zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters, doubl #endif np_int nn = n*n; np_int incx = 1; np_int incx0 = 0; #ifdef USE_MKL MKL_Complex16 *arr_orig = NULL; MKL_Complex16 *arr_residual = NULL; Loading @@ -108,17 +109,15 @@ void zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters, doubl arr_orig = new MKL_Complex16[nn]; arr_residual = new MKL_Complex16[nn]; arr_refine = new MKL_Complex16[nn]; id = new MKL_Complex16[n]; for (np_int i=0; i<n ; i++) { id[i].real = 1; id[i].imag = 0; } id = new MKL_Complex16[1]; id[0].real = 1; id[0].imag = 0; #else arr_orig = new dcomplex[nn]; arr_residual = new dcomplex[nn]; arr_refine = new dcomplex[nn]; id = new dcomplex[n]; for (np_int i=0; i<n ; i++) id[i] = (dcomplex) 1; id = new dcomplex[1]; id[0] = (dcomplex) 1; #endif zcopy_(&nn, arr, &incx, arr_orig, &incx); } Loading Loading @@ -153,7 +152,7 @@ void zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters, doubl // Fortran-style arrays, whereas our arrays are C-style row-major. zgemm_(&transa, &transa, &n, &n, &n, &dcmone, arr, &n, arr_orig, &n, &dczero, arr_residual, &n); np_int incy = n+1; zaxpy_(&n, &dcone, id, &incx, arr_residual, &incy); zaxpy_(&n, &dcone, id, &incx0, arr_residual, &incy); double oldmax = 0; if (refinemode >0) { np_int maxindex = izamax_(&nn, arr_residual, &incx); Loading @@ -171,7 +170,7 @@ void zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters, doubl zaxpy_(&nn, &dcone, arr_refine, &incx, arr, &incx); // zcopy_(&nn, arr_refine, &incx, arr, &incx); zgemm_(&transa, &transa, &n, &n, &n, &dcmone, arr, &n, arr_orig, &n, &dczero, arr_residual, &n); zaxpy_(&n, &dcone, id, &incx, arr_residual, &incy); zaxpy_(&n, &dcone, id, &incx0, arr_residual, &incy); if ((refinemode==2) || ((refinemode==1) && (iter == (maxiters-1)))) { np_int maxindex = izamax_(&nn, arr_residual, &incx); #ifdef USE_MKL Loading
src/libnptm/magma_calls.cpp +6 −6 Original line number Diff line number Diff line Loading @@ -132,16 +132,16 @@ void magma_zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters, } // allocate memory for the identity vector on the host { dcomplex *native_id = new dcomplex[m]; for (magma_int_t i=0; i<m; i++) native_id[i] = 1; dcomplex *native_id = new dcomplex[1]; native_id[0] = 1; magmaDoubleComplex *id = (magmaDoubleComplex *) &(native_id[0]); // fill it with 1 err = magma_zmalloc(&d_id, m); err = magma_zmalloc(&d_id, 1); if (err != MAGMA_SUCCESS) { printf("Error allocating d_id\n"); exit(1); } magma_zsetvector(m, id, 1, d_id, 1, queue); // copy identity to device vector magma_zsetvector(1, id, 1, d_id, 1, queue); // copy identity to device vector delete[] native_id; // free identity vector on host } } Loading @@ -161,7 +161,7 @@ void magma_zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters, // Fortran-style arrays, whereas our arrays are C-style row-major. magma_zgemm(MagmaNoTrans, MagmaNoTrans, m, m, m, magma_mone, d_a, m, d_a_orig, m, magma_zero, d_a_residual, m, queue); // add the identity to the product magma_zaxpy (m, magma_one, d_id, 1, d_a_residual, m+1, queue); magma_zaxpy (m, magma_one, d_id, 0, d_a_residual, m+1, queue); double oldmax=0; if (refinemode >0) { // find the maximum absolute value of the residual Loading @@ -184,7 +184,7 @@ void magma_zinvert_and_refine(dcomplex **mat, np_int n, int &jer, int &maxiters, // multiply minus the original matrix times the new inverse matrix magma_zgemm(MagmaNoTrans, MagmaNoTrans, m, m, m, magma_mone, d_a, m, d_a_orig, m, magma_zero, d_a_residual, m, queue); // add the identity to the product magma_zaxpy (m, magma_one, d_id, 1, d_a_residual, m+1, queue); magma_zaxpy (m, magma_one, d_id, 0, d_a_residual, m+1, queue); if ((refinemode==2) || ((refinemode==1) && (iter == (maxiters-1)))) { // find the maximum absolute value of the residual magma_int_t maxindex = magma_izamax(mm, d_a_residual, 1, queue); Loading