librsb-1.2.0-rc7/ 0000755 0001750 0001750 00000000000 13115023673 010462 5 0000000 0000000 librsb-1.2.0-rc7/rsb_krnl.h 0000644 0001750 0001750 00000077151 13115011301 012363 0000000 0000000 /* @cond INNERDOC */
/*!
@file
@brief Matrix type dispatching code, for each matrix operation.
*/
/*
Copyright (C) 2008-2017 Michele Martone
This file is part of librsb.
librsb is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
librsb is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public
License along with librsb; see the file COPYING.
If not, see .
*/
/*
The code in this file was generated automatically by an M4 script.
It is not meant to be used as an API (Application Programming Interface).
p.s.: right now, only row major matrix access is considered.
*/
#ifndef RSB_DISPATCH_H_INCLUDED
#define RSB_DISPATCH_H_INCLUDED
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
/*!
@file
@brief
Performance kernels dispatching code, for each type, submatrix size, operation.
But for block compressed sparse stripes format.
Kernels unrolled, with no loops, for only user-specified blockings.
*/
/*
Copyright (C) 2008-2017 Michele Martone
This file is part of librsb.
librsb is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
librsb is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public
License along with librsb; see the file COPYING.
If not, see .
*/
/*
The code in this file was generated automatically by an M4 script.
It is not meant to be used as an API (Application Programming Interface).
p.s.: right now, only row major matrix access is considered.
*/
#include "rsb_common.h"
#include "rsb_krnl_bcss_spmv_u.h" /* uhm */
#include "rsb_krnl_bcss_spsv_u.h" /* uhm */
#include "rsb_krnl_bcss_misc_u.h" /* uhm */
#define RSB_BCSR_GET_NEXT_BLOCK_POINTER(BP,mtxAp,ROWVAR,COLVAR,BLOCKROWSPAR,BLOCKCOLSPAR,BLOCKROWVAR,BLOCKCOLUMNVAR) \
/* \
* *input* \
* mtxAp should be a valid rsb_mtx_t structure pointer \
* BLOCKROWSPAR should be set to the rows count of this block \
* BLOCKCOLSPAR should be set to the column count of this block \
* *output* \
* ROWVAR will be set to the base row of this block \
* COLVAR will be set to the base column of this block \
* BP will be set to the current block pointer \
* */ \
while( (mtxAp)->bpntr[_i] == (mtxAp)->bpntr[_i+1] ) /* skipping empty rows */ \
{++_i;_k=(mtxAp)->bpntr[_i];} /* _k is the first block index for the current row of blocks */ \
_j=(mtxAp)->bindx[_k]; /* the current block column index */ \
_lastk=_k; \
(BLOCKROWVAR)=_i; \
(BLOCKCOLUMNVAR)=_j; \
(ROWVAR)=(BLOCKROWSPAR)*_i; /* _i is the current block row index */ \
(COLVAR)=(BLOCKCOLSPAR)*_j; /* the current block column index */ \
BP+=(mtxAp)->options->el_size*(BLOCKROWSPAR)*(BLOCKCOLSPAR); \
_k++; /* for the future macro calls */ \
if( _k >= (mtxAp)->bpntr[_i+1] )++_i; \
;
#define RSB_BCSR_GET_FIRST_BLOCK_POINTER(BP,mtxAp,ROWVAR,COLVAR,BLOCKROWSVAR,BLOCKCOLSVAR,BLOCKROWVAR,BLOCKCOLUMNVAR) \
int _i=0,_j=0,_k=0,_lastk=0; \
(BLOCKROWSVAR)=(mtxAp)->rpntr[1]-(mtxAp)->rpntr[0]; /* _i is the current block row index */ \
(BLOCKCOLSVAR)=(mtxAp)->cpntr[1]-(mtxAp)->cpntr[0]; /* the current block column index */ \
(BP)=(mtxAp)->VA; \
RSB_BCSR_GET_NEXT_BLOCK_POINTER(BP,mtxAp,ROWVAR,COLVAR,BLOCKROWSVAR,BLOCKCOLSVAR,BLOCKROWVAR,BLOCKCOLUMNVAR)
#define RSB_BCSR_GOT_LAST_BLOCK_POINTER(mtxAp) ( _lastk >= (mtxAp)->block_count )
#define RSB_BENCHMARK_MIN_SECONDS /*0.5*/1.0
#define RSB_BENCHMARK_MIN_RUNS /*5*/10
rsb_err_t rsb__do_spmv_uaua(const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
rsb_err_t rsb__do_spmv_uauz(const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
rsb_err_t rsb__do_spmv_uxua(const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const void * alphap,const rsb_trans_t transA);
rsb_err_t rsb__do_spmv_unua(const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
rsb_err_t rsb__do_spmv_sasa(const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,rsb_coo_idx_t incx, rsb_coo_idx_t incy,const rsb_trans_t transA);
rsb_err_t rsb__do_spsv_uxua(const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
rsb_err_t rsb__do_spmv_sxsa(const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const void * alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy,const rsb_trans_t transA);
rsb_err_t rsb__do_spsv_sxsx(const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const void * alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy,const rsb_trans_t transA);
rsb_err_t rsb__do_infty_norm(const struct rsb_mtx_t * mtxAp,const rsb_trans_t transA,void * row_sums);
rsb_err_t rsb__do_rowssums(const struct rsb_mtx_t * mtxAp,const rsb_trans_t transA,void * row_sums);
rsb_err_t rsb__do_scale(struct rsb_mtx_t * mtxAp,const rsb_trans_t transA,const void * scale_factors);
#ifdef RSB_WANT_KERNELS_DEBUG
rsb_err_t rsb__spmv_uaua_testing(const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
#endif /* RSB_WANT_KERNELS_DEBUG */
rsb_err_t rsb_do_time_spmv_uaua(double * elapsed_time, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
#ifdef RSB_WANT_KERNELS_DEBUG
rsb_err_t rsb__spmv_uauz_testing(const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
#endif /* RSB_WANT_KERNELS_DEBUG */
rsb_err_t rsb_do_time_spmv_uauz(double * elapsed_time, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
#ifdef RSB_WANT_KERNELS_DEBUG
rsb_err_t rsb__spmv_uxua_testing(const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const void * alphap,const rsb_trans_t transA);
#endif /* RSB_WANT_KERNELS_DEBUG */
rsb_err_t rsb_do_time_spmv_uxua(double * elapsed_time, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const void * alphap,const rsb_trans_t transA);
#ifdef RSB_WANT_KERNELS_DEBUG
rsb_err_t rsb__spmv_unua_testing(const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
#endif /* RSB_WANT_KERNELS_DEBUG */
rsb_err_t rsb_do_time_spmv_unua(double * elapsed_time, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
#ifdef RSB_WANT_KERNELS_DEBUG
rsb_err_t rsb__spmv_sasa_testing(const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,rsb_coo_idx_t incx, rsb_coo_idx_t incy,const rsb_trans_t transA);
#endif /* RSB_WANT_KERNELS_DEBUG */
rsb_err_t rsb_do_time_spmv_sasa(double * elapsed_time, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,rsb_coo_idx_t incx, rsb_coo_idx_t incy,const rsb_trans_t transA);
#ifdef RSB_WANT_KERNELS_DEBUG
rsb_err_t rsb__spsv_uxua_testing(const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
#endif /* RSB_WANT_KERNELS_DEBUG */
rsb_err_t rsb_do_time_spsv_uxua(double * elapsed_time, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
#ifdef RSB_WANT_KERNELS_DEBUG
rsb_err_t rsb__spmv_sxsa_testing(const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const void * alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy,const rsb_trans_t transA);
#endif /* RSB_WANT_KERNELS_DEBUG */
rsb_err_t rsb_do_time_spmv_sxsa(double * elapsed_time, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const void * alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy,const rsb_trans_t transA);
#ifdef RSB_WANT_KERNELS_DEBUG
rsb_err_t rsb__spsv_sxsx_testing(const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const void * alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy,const rsb_trans_t transA);
#endif /* RSB_WANT_KERNELS_DEBUG */
rsb_err_t rsb_do_time_spsv_sxsx(double * elapsed_time, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const void * alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy,const rsb_trans_t transA);
#ifdef RSB_WANT_KERNELS_DEBUG
rsb_err_t rsb__infty_norm_testing(const struct rsb_mtx_t * mtxAp,const rsb_trans_t transA,void * row_sums);
#endif /* RSB_WANT_KERNELS_DEBUG */
rsb_err_t rsb_do_time_infty_norm(double * elapsed_time, const struct rsb_mtx_t * mtxAp,const rsb_trans_t transA,void * row_sums);
#ifdef RSB_WANT_KERNELS_DEBUG
rsb_err_t rsb__rowssums_testing(const struct rsb_mtx_t * mtxAp,const rsb_trans_t transA,void * row_sums);
#endif /* RSB_WANT_KERNELS_DEBUG */
rsb_err_t rsb_do_time_rowssums(double * elapsed_time, const struct rsb_mtx_t * mtxAp,const rsb_trans_t transA,void * row_sums);
#ifdef RSB_WANT_KERNELS_DEBUG
rsb_err_t rsb__scale_testing(struct rsb_mtx_t * mtxAp,const rsb_trans_t transA,const void * scale_factors);
#endif /* RSB_WANT_KERNELS_DEBUG */
rsb_err_t rsb_do_time_scale(double * elapsed_time, struct rsb_mtx_t * mtxAp,const rsb_trans_t transA,const void * scale_factors);
rsb_err_t rsb__do_fullrangebenchmark_double_spmv_uaua(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_double_spmv_uaua(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_float_spmv_uaua(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_float_spmv_uaua(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_float_complex_spmv_uaua(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_float_complex_spmv_uaua(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_double_complex_spmv_uaua(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_double_complex_spmv_uaua(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_double_spmv_uauz(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_double_spmv_uauz(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_float_spmv_uauz(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_float_spmv_uauz(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_float_complex_spmv_uauz(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_float_complex_spmv_uauz(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_double_complex_spmv_uauz(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_double_complex_spmv_uauz(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_double_spmv_uxua(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_double_spmv_uxua(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const void * alphap,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_float_spmv_uxua(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_float_spmv_uxua(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const void * alphap,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_float_complex_spmv_uxua(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_float_complex_spmv_uxua(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const void * alphap,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_double_complex_spmv_uxua(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_double_complex_spmv_uxua(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const void * alphap,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_double_spmv_unua(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_double_spmv_unua(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_float_spmv_unua(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_float_spmv_unua(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_float_complex_spmv_unua(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_float_complex_spmv_unua(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_double_complex_spmv_unua(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_double_complex_spmv_unua(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_double_spmv_sasa(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_double_spmv_sasa(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,rsb_coo_idx_t incx, rsb_coo_idx_t incy,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_float_spmv_sasa(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_float_spmv_sasa(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,rsb_coo_idx_t incx, rsb_coo_idx_t incy,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_float_complex_spmv_sasa(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_float_complex_spmv_sasa(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,rsb_coo_idx_t incx, rsb_coo_idx_t incy,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_double_complex_spmv_sasa(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_double_complex_spmv_sasa(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,rsb_coo_idx_t incx, rsb_coo_idx_t incy,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_double_spsv_uxua(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_double_spsv_uxua(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_float_spsv_uxua(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_float_spsv_uxua(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_float_complex_spsv_uxua(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_float_complex_spsv_uxua(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_double_complex_spsv_uxua(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_double_complex_spsv_uxua(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_double_spmv_sxsa(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_double_spmv_sxsa(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const void * alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_float_spmv_sxsa(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_float_spmv_sxsa(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const void * alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_float_complex_spmv_sxsa(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_float_complex_spmv_sxsa(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const void * alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_double_complex_spmv_sxsa(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_double_complex_spmv_sxsa(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const void * alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_double_spsv_sxsx(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_double_spsv_sxsx(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const void * alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_float_spsv_sxsx(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_float_spsv_sxsx(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const void * alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_float_complex_spsv_sxsx(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_float_complex_spsv_sxsx(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const void * alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_double_complex_spsv_sxsx(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_double_complex_spsv_sxsx(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const void * alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy,const rsb_trans_t transA);
rsb_err_t rsb__do_fullrangebenchmark_double_infty_norm(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_double_infty_norm(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const rsb_trans_t transA,void * row_sums);
rsb_err_t rsb__do_fullrangebenchmark_float_infty_norm(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_float_infty_norm(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const rsb_trans_t transA,void * row_sums);
rsb_err_t rsb__do_fullrangebenchmark_float_complex_infty_norm(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_float_complex_infty_norm(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const rsb_trans_t transA,void * row_sums);
rsb_err_t rsb__do_fullrangebenchmark_double_complex_infty_norm(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_double_complex_infty_norm(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const rsb_trans_t transA,void * row_sums);
rsb_err_t rsb__do_fullrangebenchmark_double_rowssums(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_double_rowssums(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const rsb_trans_t transA,void * row_sums);
rsb_err_t rsb__do_fullrangebenchmark_float_rowssums(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_float_rowssums(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const rsb_trans_t transA,void * row_sums);
rsb_err_t rsb__do_fullrangebenchmark_float_complex_rowssums(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_float_complex_rowssums(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const rsb_trans_t transA,void * row_sums);
rsb_err_t rsb__do_fullrangebenchmark_double_complex_rowssums(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_double_complex_rowssums(double * total_elapsed_time, double * m_flops, const struct rsb_mtx_t * mtxAp,const rsb_trans_t transA,void * row_sums);
rsb_err_t rsb__do_fullrangebenchmark_double_scale(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_double_scale(double * total_elapsed_time, double * m_flops, struct rsb_mtx_t * mtxAp,const rsb_trans_t transA,const void * scale_factors);
rsb_err_t rsb__do_fullrangebenchmark_float_scale(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_float_scale(double * total_elapsed_time, double * m_flops, struct rsb_mtx_t * mtxAp,const rsb_trans_t transA,const void * scale_factors);
rsb_err_t rsb__do_fullrangebenchmark_float_complex_scale(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_float_complex_scale(double * total_elapsed_time, double * m_flops, struct rsb_mtx_t * mtxAp,const rsb_trans_t transA,const void * scale_factors);
rsb_err_t rsb__do_fullrangebenchmark_double_complex_scale(void *VA, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, rsb_nnz_idx_t nnz, rsb_coo_idx_t rows, rsb_coo_idx_t cols, struct rsb_mop_performance_info_t * mpi, rsb_flags_t flags);
rsb_err_t rsb__do_benchmark_double_complex_scale(double * total_elapsed_time, double * m_flops, struct rsb_mtx_t * mtxAp,const rsb_trans_t transA,const void * scale_factors);
#if 0
rsb_err_t rsb__do_spmv_uaua_with_macros_vbr(const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
#endif /* 0 */
double rsb__estimate_mflops_per_op_spmv_uaua(const struct rsb_mtx_t * mtxAp);
#if 0
rsb_err_t rsb__do_spmv_uauz_with_macros_vbr(const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
#endif /* 0 */
double rsb__estimate_mflops_per_op_spmv_uauz(const struct rsb_mtx_t * mtxAp);
#if 0
rsb_err_t rsb__do_spmv_uxua_with_macros_vbr(const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const void * alphap,const rsb_trans_t transA);
#endif /* 0 */
double rsb__estimate_mflops_per_op_spmv_uxua(const struct rsb_mtx_t * mtxAp);
#if 0
rsb_err_t rsb__do_spmv_unua_with_macros_vbr(const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
#endif /* 0 */
double rsb__estimate_mflops_per_op_spmv_unua(const struct rsb_mtx_t * mtxAp);
#if 0
rsb_err_t rsb__do_spmv_sasa_with_macros_vbr(const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,rsb_coo_idx_t incx, rsb_coo_idx_t incy,const rsb_trans_t transA);
#endif /* 0 */
double rsb__estimate_mflops_per_op_spmv_sasa(const struct rsb_mtx_t * mtxAp);
#if 0
rsb_err_t rsb__do_spsv_uxua_with_macros_vbr(const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const rsb_trans_t transA);
#endif /* 0 */
double rsb__estimate_mflops_per_op_spsv_uxua(const struct rsb_mtx_t * mtxAp);
#if 0
rsb_err_t rsb__do_spmv_sxsa_with_macros_vbr(const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const void * alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy,const rsb_trans_t transA);
#endif /* 0 */
double rsb__estimate_mflops_per_op_spmv_sxsa(const struct rsb_mtx_t * mtxAp);
#if 0
rsb_err_t rsb__do_spsv_sxsx_with_macros_vbr(const struct rsb_mtx_t * mtxAp,const void * restrict rhs, void * restrict out,const void * alphap,rsb_coo_idx_t incx, rsb_coo_idx_t incy,const rsb_trans_t transA);
#endif /* 0 */
double rsb__estimate_mflops_per_op_spsv_sxsx(const struct rsb_mtx_t * mtxAp);
#if 0
rsb_err_t rsb__do_infty_norm_with_macros_vbr(const struct rsb_mtx_t * mtxAp,const rsb_trans_t transA,void * row_sums);
#endif /* 0 */
double rsb__estimate_mflops_per_op_infty_norm(const struct rsb_mtx_t * mtxAp);
#if 0
rsb_err_t rsb__do_rowssums_with_macros_vbr(const struct rsb_mtx_t * mtxAp,const rsb_trans_t transA,void * row_sums);
#endif /* 0 */
double rsb__estimate_mflops_per_op_rowssums(const struct rsb_mtx_t * mtxAp);
#if 0
rsb_err_t rsb__do_scale_with_macros_vbr(struct rsb_mtx_t * mtxAp,const rsb_trans_t transA,const void * scale_factors);
#endif /* 0 */
double rsb__estimate_mflops_per_op_scale(const struct rsb_mtx_t * mtxAp);
rsb_err_t rsb__dump_performance_array(const char * an, const double*array);
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif /* RSB_DISPATCH_H_INCLUDED */
/*!
@file
@brief ...
*/
/* @endcond */
librsb-1.2.0-rc7/rsb_mmio.h 0000644 0001750 0001750 00000012345 12520763126 012372 0000000 0000000 /*
Copyright (C) 2008-2015 Michele Martone
This file is part of librsb.
librsb is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
librsb is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public
License along with librsb; see the file COPYING.
If not, see .
*/
/* @cond INNERDOC */
/**
* @file
* @brief
* Matrix Market I/O library for ANSI C.
* See http://math.nist.gov/MatrixMarket for details.
* @author Michele Martone
*/
#ifndef MM_IO_H_INCLUDED
#define MM_IO_H_INCLUDED
#include "rsb.h" /* just for some type compatibility */
#define MM_MAX_LINE_LENGTH 1025
#define MatrixMarketBanner "%%MatrixMarket"
#define MM_MAX_TOKEN_LENGTH 64
typedef char MM_typecode[];
char *rsb__mm_typecode_to_str(MM_typecode matcode);
int rsb__mm_read_banner(FILE *f, FILE * ngzfd, MM_typecode *matcode);
int rsb__mm_read_mtx_crd_size(FILE *f, FILE * ngzfd, int *M, int *N, int *nz);
int rsb__mm_read_mtx_array_size(FILE *f, FILE * ngzfd, int *M, int *N);
int rsb__mm_write_banner(FILE *f, MM_typecode matcode);
int rsb__mm_write_mtx_crd_size(FILE *f, int M, int N, int nz);
int rsb__mm_write_mtx_array_size(FILE *f, int M, int N);
/********************* MM_typecode query fucntions ***************************/
#define rsb_mm_is_matrix(typecode) ((typecode)[0]=='M')
#define rsb_mm_is_sparse(typecode) ((typecode)[1]=='C')
#define rsb_mm_is_coordinate(typecode)((typecode)[1]=='C')
#define rsb_mm_is_dense(typecode) ((typecode)[1]=='A')
#define rsb_mm_is_array(typecode) ((typecode)[1]=='A')
#define rsb_mm_is_complex(typecode) ((typecode)[2]=='C')
#define rsb_mm_is_real(typecode) ((typecode)[2]=='R')
#define rsb_mm_is_pattern(typecode) ((typecode)[2]=='P')
#define rsb_mm_is_integer(typecode) ((typecode)[2]=='I')
#define rsb_mm_is_symmetric(typecode)((typecode)[3]=='S')
#define rsb_mm_is_general(typecode) ((typecode)[3]=='G')
#define rsb_mm_is_skew(typecode) ((typecode)[3]=='K')
#define rsb_mm_is_hermitian(typecode)((typecode)[3]=='H')
int rsb__mm_is_valid(MM_typecode matcode); /* too complex for a macro */
/********************* MM_typecode modify fucntions ***************************/
#define rsb_mm_set_matrix(typecode) ((*typecode)[0]='M')
#define rsb_mm_set_coordinate(typecode) ((*typecode)[1]='C')
#define rsb_mm_set_array(typecode) ((*typecode)[1]='A')
#define rsb_mm_set_dense(typecode) rsb_mm_set_array(typecode)
#define rsb_mm_set_sparse(typecode) rsb_mm_set_coordinate(typecode)
#define rsb_mm_set_complex(typecode)((*typecode)[2]='C')
#define rsb_mm_set_real(typecode) ((*typecode)[2]='R')
#define rsb_mm_set_pattern(typecode)((*typecode)[2]='P')
#define rsb_mm_set_integer(typecode)((*typecode)[2]='I')
#define rsb_mm_set_symmetric(typecode)((*typecode)[3]='S')
#define rsb_mm_set_general(typecode)((*typecode)[3]='G')
#define rsb_mm_set_skew(typecode) ((*typecode)[3]='K')
#define rsb_mm_set_hermitian(typecode)((*typecode)[3]='H')
#define rsb_mm_clear_typecode(typecode) ((*typecode)[0]=(*typecode)[1]= \
(*typecode)[2]=' ',(*typecode)[3]='G')
#define rsb_mm_initialize_typecode(typecode) rsb_mm_clear_typecode(typecode)
/********************* Matrix Market error codes ***************************/
#define MM_COULD_NOT_READ_FILE 11
#define MM_PREMATURE_EOF 12
#define MM_NOT_MTX 13
#define MM_NO_HEADER 14
#define MM_UNSUPPORTED_TYPE 15
#define MM_LINE_TOO_LONG 16
#define MM_COULD_NOT_WRITE_FILE 17
/******************** Matrix Market internal definitions ********************
MM_matrix_typecode: 4-character sequence
ojbect sparse/ data storage
dense type scheme
string position: [0] [1] [2] [3]
Matrix typecode: M(atrix) C(oord) R(eal) G(eneral)
A(array) C(omplex) H(ermitian)
P(attern) S(ymmetric)
I(nteger) K(kew)
***********************************************************************/
#define MM_MTX_STR "matrix"
#define MM_ARRAY_STR "array"
#define MM_DENSE_STR "array"
#define MM_COORDINATE_STR "coordinate"
#define MM_SPARSE_STR "coordinate"
#define MM_COMPLEX_STR "complex"
#define MM_REAL_STR "real"
#define MM_INT_STR "integer"
#define MM_GENERAL_STR "general"
#define MM_SYMM_STR "symmetric"
#define MM_HERM_STR "hermitian"
#define MM_SKEW_STR "skew-symmetric"
#define MM_PATTERN_STR "pattern"
/* high level routines */
int rsb_mm_write_mtx_crd(char fname[], int M, int N, int nz, int IA[], int JA[],
double VA[], MM_typecode matcode);
int rsb_mm_read_mtx_crd_data(FILE *f, int M, int N, int nz, rsb_coo_idx_t IA[], rsb_coo_idx_t JA[],
double VA[], MM_typecode matcode);
int rsb__mm_read_mtx_crd_entry(FILE *f, rsb_coo_idx_t * IA, rsb_coo_idx_t * JA, double *real, double *img, MM_typecode matcode);
#endif /* MM_IO_H_INCLUDED */
/* @endcond */
librsb-1.2.0-rc7/ot-spmv_uauz.c 0000644 0001750 0001750 00000000032 12537115176 013221 0000000 0000000 static int f(){return 0;}
librsb-1.2.0-rc7/ot-spmv_unua.c 0000644 0001750 0001750 00000000032 12537115176 013205 0000000 0000000 static int f(){return 0;}
librsb-1.2.0-rc7/rsb_rec.h 0000644 0001750 0001750 00000007163 12520763126 012204 0000000 0000000 /*
Copyright (C) 2008-2015 Michele Martone
This file is part of librsb.
librsb is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
librsb is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public
License along with librsb; see the file COPYING.
If not, see .
*/
/* @cond INNERDOC */
/**
* @file
* @brief Recursion handling code
* @author Michele Martone
* */
#ifndef RSB_REC_H_INCLUDED
#define RSB_REC_H_INCLUDED
#include "rsb_internals.h"
enum rsb_op_t{ /* FIXME : temporary, experimental */
rsb_op_spmv = 1,
rsb_op_spsvl = 2,
rsb_op_spsvlt = 3,
rsb_op_spsvu = 4,
rsb_op_spsvut = 5,
rsb_op_get_csr = 6,
rsb_op_nop = 0
};
#define RSB_PTR_SHIFT(DPTR,MPTR,PPTR,TC) /* FIXME: temporarily here */ \
if( (PPTR) > (MPTR) ) \
{ ( DPTR) = TC (( (const rsb_byte_t *) (DPTR) ) + ( ( (const rsb_byte_t *) (PPTR) ) - ( (const rsb_byte_t *) (MPTR) )) ); } \
else \
{ ( DPTR) = TC (( (const rsb_byte_t *) (DPTR) ) - ( ( (const rsb_byte_t *) (MPTR) ) - ( (const rsb_byte_t *) (PPTR) )) ); }
#define RSB_TMP_OVERALLOC_MTX 4 /* 1 < RSB_TMP_OVERALLOC_MTX < 4; a temporary measure */
rsb_err_t rsb__init_set_quad_submatrices_info(const struct rsb_mtx_partitioning_info_t * pinfop, struct rsb_mtx_t ** matrices, rsb_nnz_idx_t uuk, rsb_nnz_idx_t mk, rsb_nnz_idx_t uk, rsb_nnz_idx_t lk, rsb_nnz_idx_t llk, rsb_coo_idx_t mB, rsb_coo_idx_t kB, rsb_coo_idx_t roff, rsb_coo_idx_t coff);
rsb_err_t rsb__get_array_of_leaf_matrices(struct rsb_mtx_t *mtxAp, struct rsb_translated_matrix_t ** tmatricesp, rsb_submatrix_idx_t *countp);
rsb_err_t rsb__fill_array_of_leaf_matrices(const struct rsb_translated_matrix_t *tmatrix, struct rsb_translated_matrix_t *matrices, rsb_submatrix_idx_t * n);
rsb_err_t rsb__sort_array_of_leaf_matrices(const struct rsb_translated_matrix_t *rmatrix,struct rsb_translated_matrix_t *matrices, rsb_submatrix_idx_t n, enum rsb_op_t op);
int rsb__compar_rcsr_matrix_for_spsvl(const void * ap, const void * bp);
size_t rsb__get_index_storage_amount(const struct rsb_mtx_t *mtxAp);
rsb_submatrix_idx_t rsb__get_diagonal_elements_count(const struct rsb_mtx_t *mtxAp);
rsb_submatrix_idx_t rsb__get_diagonal_submatrices_count(const struct rsb_mtx_t *mtxAp);
rsb_err_t rsb__sort_array_of_leaf_matrices_for_ussv(const struct rsb_mtx_t * mtxAp, struct rsb_translated_matrix_t *leaf_matrices, rsb_submatrix_idx_t n, rsb_trans_t transl);
rsb_err_t rsb__leaves_merge(struct rsb_mtx_t * RSB_RESTRICT mtxAp, rsb_submatrix_idx_t manp, rsb_time_t * RSB_RESTRICT stp, rsb_time_t *RSB_RESTRICT atp, rsb_time_t *RSB_RESTRICT ltp, const int wv, int kc);
rsb_err_t rsb__leaves_merge_multiple(struct rsb_mtx_t *mtxAp, rsb_time_t *stp, rsb_time_t *atp, rsb_time_t *ltp, const int wv, int kc);
rsb_err_t rsb__mtx_split(struct rsb_mtx_t * RSB_RESTRICT mtxAp, rsb_submatrix_idx_t manp, rsb_time_t * RSB_RESTRICT stp, rsb_time_t * RSB_RESTRICT atp, rsb_time_t * RSB_RESTRICT ltp, const int wv, int kc);
rsb_err_t rsb__mtx_realloc_with_spare_leaves(struct rsb_mtx_t **mtxApp, rsb_submatrix_idx_t slc);
#endif /* RSB_REC_H_INCLUDED */
/* @endcond */
librsb-1.2.0-rc7/rsb_prec.m4 0000644 0001750 0001750 00000011125 12520763126 012446 0000000 0000000 dnl
dnl
dnl @author: Michele Martone
dnl
ifelse(LIBMMVBR_INCLUDED_PREC_M4,1,`',`
include(`rsb_misc.m4')dnl
include(`do_unroll.m4')dnl
/* @cond INNERDOC */
dnl
/**
* @file
* @brief
* Auxiliary functions.
*/
RSB_M4_HEADER_MESSAGE()dnl
dnl
ifdef(`ONLY_WANT_HEADERS',`
#ifndef RSB_PREC_H_INCLUDED
#define RSB_PREC_H_INCLUDED
')
dnl
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
ifdef(`RSB_M4_WANT_OMP',`dnl
dnl FIXME : this should be moved elsewhere
`#define RSB_WANT_OMP '1
`#define RSB_MAX_OMP_THREADS 'RSB_M4_MAX_OMP_THREADS
ifdef(`ONLY_WANT_HEADERS',`',`dnl
#include /* OpenMP parallelism (EXPERIMENTAL) */
')
')dnl
dnl
#include "rsb_common.h"
dnl #include "rsb_internals.h"
dnl #include "rsb_types.h"
dnl
dnl
dnl
dnl FIXME : COMMENT THIS FILE
dnl -------------------------
dnl
dnl
foreach(`mtype',RSB_M4_TYPES,`dnl
dnl
dnl `rsb_err_t rsb_do_csr_ilu0_'touppercase(RSB_M4_CHOPSPACES(mtype))`(struct rsb_mtx_t * mtxAp)'dnl
`rsb_err_t rsb_do_csr_ilu0_'touppercase(RSB_M4_CHOPSPACES(mtype))`(struct rsb_coo_matrix_t * coop)'dnl
dnl `rsb_err_t rsb_do_csr_ilu0_'touppercase(RSB_M4_CHOPSPACES(mtype))(mtype `*VA, const rsb_coo_idx_t *PA, const rsb_coo_idx_t *JA)'dnl
ifdef(`ONLY_WANT_HEADERS',`;
',`dnl
{
/**
* \ingroup gr_internals
FIXME: INCOMPLETE, EXPERIMENTAL, TEMPORARILY HERE
On exit, the matrix will contain the L and U factors of a pattern preserving incomplete LU factorization (ILU 0).
*/
rsb_err_t errval = RSB_ERR_NO_ERROR;
rsb_coo_idx_t i;
{
mtype *VA = coop->VA;
const rsb_coo_idx_t *PA = coop->IA;
const rsb_coo_idx_t *JA = coop->JA;
dnl const rsb_coo_idx_t *PA = mtxAp->bpntr;
dnl const rsb_coo_idx_t *JA = mtxAp->bindx;
for(i=1;inr;++i)
{
const rsb_nnz_idx_t ifp = PA[i],ilp = PA[i+1],irnz = ilp-ifp;
rsb_nnz_idx_t idp = RSB_MARKER_NNZ_VALUE,ikp = RSB_MARKER_NNZ_VALUE;
if(irnz)
{
idp = rsb__nnz_split_coo_bsearch(JA+ifp,i,irnz)+ifp;
assert(idp<=ilp);
assert(idp>=ifp);
for(ikp=ifp;ikp=JA[ijp] */
for(;JA[kjp]>JA[ijp] && ijpflags & RSB_FLAG_USE_HALFWORD_INDICES) ||
/*mtxAp->typecode != RSB_NUMERICAL_TYPE_DOUBLE || */!rsb__is_square(mtxAp) || rsb__is_symmetric(mtxAp) ||
RSB_DO_FLAG_HAS(mtxAp->flags,RSB_FLAG_UNIT_DIAG_IMPLICIT)
)
{
RSB_ERROR(RSB_ERRM_ES);
errval = RSB_ERR_BADARGS;
goto err;
}
if(mtxAp->nr==1)
goto err;
if((errval = rsb__project_rsb_to_coo(mtxAp,&coo))!=RSB_ERR_NO_ERROR)
goto err;
foreach(`mtype',RSB_M4_TYPES,`dnl
`#ifdef 'RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype)
if( mtxAp->typecode == RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) )
return rsb_do_csr_ilu0_`'touppercase(RSB_M4_CHOPSPACES(mtype))(&coo);
else
#endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
')dnl
errval = RSB_ERR_INTERNAL_ERROR;
err:
RSB_DO_ERR_RETURN(errval)
}
')dnl
dnl
rsb_err_t rsb__prec_csr_ilu0(struct rsb_coo_matrix_t * coop)`'dnl
ifdef(`ONLY_WANT_HEADERS',`;
',`dnl
{
// FIXME: termporary
if(coop->nr==1)
goto err;
foreach(`mtype',RSB_M4_TYPES,`dnl
`#ifdef 'RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype)
if( coop->typecode == RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) )
dnl return rsb_do_csr_ilu0_`'touppercase(RSB_M4_CHOPSPACES(mtype))(coo->VA,coo->IA,coo->JA);
return rsb_do_csr_ilu0_`'touppercase(RSB_M4_CHOPSPACES(mtype))(coop);
else
#endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
')dnl
err:
return RSB_ERR_INTERNAL_ERROR;
}
')dnl
dnl
dnl
#ifdef __cplusplus
}
#endif /* __cplusplus */
dnl
dnl
ifdef(`ONLY_WANT_HEADERS',`
#endif /* RSB_PREC_H_INCLUDED */
')
')
dnl
/* @endcond */
dnl
librsb-1.2.0-rc7/rsb_krnl_bcss_l.h 0000644 0001750 0001750 00000002447 13115011302 013705 0000000 0000000 /* @cond INNERDOC */
/*!
@file
@brief
Performance kernels dispatching code, for each type, submatrix size, operation.
But for block compressed sparse stripes format.
Kernels unrolled, with explicit loops, for any blockings.
FIXME : OBSOLETE.
*/
/*
Copyright (C) 2008-2017 Michele Martone
This file is part of librsb.
librsb is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
librsb is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public
License along with librsb; see the file COPYING.
If not, see .
*/
/*
The code in this file was generated automatically by an M4 script.
It is not meant to be used as an API (Application Programming Interface).
p.s.: right now, only row major matrix access is considered.
*/
#ifndef RSB_BCSS_L_H_INCLUDED
#define RSB_BCSS_L_H_INCLUDED
#include "rsb_internals.h"
#endif /* RSB_BCSS_L_H_INCLUDED */
/* @endcond */
librsb-1.2.0-rc7/rsb_pcnt.h 0000644 0001750 0001750 00000003733 12520763126 012376 0000000 0000000 /*
Copyright (C) 2008-2015 Michele Martone
This file is part of librsb.
librsb is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
librsb is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public
License along with librsb; see the file COPYING.
If not, see .
*/
/* @cond INNERDOC */
/**
* @file
* @brief Perfomance tuning or measuring code.
* @author Michele Martone
* */
#ifndef RSB_PCNT_H_INCLUDED
#define RSB_PCNT_H_INCLUDED
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
#include
#include "rsb_internals.h"
#include "rsb_perf.h"
#ifdef RSB_HAVE_PAPI
#include /* http://icl.cs.utk.edu/papi/ */
typedef long_long rsb_papi_long; /* long_long is a typedef originating in the papi headers */
typedef int rsb_papi_int_t;
typedef int rsb_papi_err_t;
#define RSB_PC_MAX_ITEMS 3
#endif /* RSB_HAVE_PAPI */
struct rsb_pci_t
{
int eventnum;
#ifdef RSB_HAVE_PAPI
rsb_papi_int_t eventlist[RSB_PC_MAX_ITEMS];
rsb_papi_long eventvals[RSB_PC_MAX_ITEMS];
char eventdesc[RSB_PC_MAX_ITEMS][PAPI_MAX_STR_LEN];
#endif /* RSB_HAVE_PAPI */
};
rsb_err_t rsb_perf_counters_init(void);
rsb_err_t rsb_perf_counters_finalize(void);
rsb_err_t rsb_perf_counters_dump(const rsb_char_t *premsg, const rsb_char_t *postmsg, rsb_int_t tdiv, struct rsb_pci_t *pcip);
rsb_err_t rsb_hc_main(void); /* preliminary */
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif /* RSB_PCNT_H_INCLUDED */
/* @endcond */
librsb-1.2.0-rc7/rsb_spsum_misc.c 0000644 0001750 0001750 00000025732 13115012674 013606 0000000 0000000 /* @cond INNERDOC */
/*!
* @file
* @author Michele Martone
* @brief
* */
/*
Copyright (C) 2008-2017 Michele Martone
This file is part of librsb.
librsb is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
librsb is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public
License along with librsb; see the file COPYING.
If not, see .
*/
/*
The code in this file was generated automatically by an M4 script.
It is not meant to be used as an API (Application Programming Interface).
p.s.: right now, only row major matrix access is considered.
*/
#include "rsb_common.h"
rsb_err_t rsb__do_add_submatrix_to_dense(const struct rsb_mtx_t * mtxAp, const void *alphap, void * Bp, rsb_nnz_idx_t ldb, rsb_nnz_idx_t nr, rsb_nnz_idx_t nc, rsb_bool_t rowmajor)
{
rsb_nnz_idx_t n;
rsb_err_t errval = RSB_ERR_NO_ERROR;
rsb_coo_idx_t roff=0, coff=0;
if(!mtxAp || !Bp || !alphap ) {errval = RSB_ERR_BADARGS; goto err;}
roff=mtxAp->roff, coff=mtxAp->coff;
#ifdef RSB_NUMERICAL_TYPE_DOUBLE
if( mtxAp->typecode == RSB_NUMERICAL_TYPE_DOUBLE )
{
{
double *VA=mtxAp->VA;
if(rsb__is_coo_matrix(mtxAp))
{
if(RSB_DO_FLAG_HAS(mtxAp->flags,RSB_FLAG_USE_HALFWORD_INDICES))
{
RSB_DECLARE_CONST_HALFCOO_ARRAYS_FROM_MATRIX(IA,JA,mtxAp)
if(rowmajor)
for(n=0;RSB_LIKELY(nnnz);++n)
*(double*)(RSB_BLOCK_ROWMAJOR_ADDRESS(Bp,ldb,nr,nc,IA[n]+roff,JA[n]+coff,mtxAp->el_size))+=(*(double*)alphap)*(VA[n]);
else
for(n=0;RSB_LIKELY(nnnz);++n)
*(double*)(RSB_BLOCK_COLMAJOR_ADDRESS(Bp,ldb,nr,nc,IA[n]+roff,JA[n]+coff,mtxAp->el_size))+=(*(double*)alphap)*(VA[n]);
}
else
{
RSB_DECLARE_CONST_FULLCOO_ARRAYS_FROM_MATRIX(IA,JA,mtxAp)
if(rowmajor)
for(n=0;RSB_LIKELY(nnnz);++n)
*(double*)(RSB_BLOCK_ROWMAJOR_ADDRESS(Bp,ldb,nr,nc,IA[n]+roff,JA[n]+coff,mtxAp->el_size))+=(*(double*)alphap)*(VA[n]);
else
for(n=0;RSB_LIKELY(nnnz);++n)
*(double*)(RSB_BLOCK_COLMAJOR_ADDRESS(Bp,ldb,nr,nc,IA[n]+roff,JA[n]+coff,mtxAp->el_size))+=(*(double*)alphap)*(VA[n]);
}
}
else
if(rsb__is_csr_matrix(mtxAp))
{
rsb_nnz_idx_t n,i;
if(RSB_DO_FLAG_HAS(mtxAp->flags,(RSB_FLAG_USE_HALFWORD_INDICES)))
{
RSB_DECLARE_CONST_HALFCSR_ARRAYS_FROM_MATRIX(PA,JA,mtxAp)
if(rowmajor)
{
for(i=0;RSB_LIKELY(inr);++i)
for(n=PA[i];RSB_LIKELY(nel_size))+=(*(double*)alphap)*(VA[n]);
}
else
{
for(i=0;RSB_LIKELY(inr);++i)
for(n=PA[i];RSB_LIKELY(nel_size))+=(*(double*)alphap)*(VA[n]);
}
}
else
{
RSB_DECLARE_CONST_FULLCSR_ARRAYS_FROM_MATRIX(PA,JA,mtxAp)
if(rowmajor)
{
for(i=0;RSB_LIKELY(inr);++i)
for(n=PA[i];RSB_LIKELY(nel_size))+=(*(double*)alphap)*(VA[n]);
}
else
{
for(i=0;RSB_LIKELY(inr);++i)
for(n=PA[i];RSB_LIKELY(nel_size))+=(*(double*)alphap)*(VA[n]);
}
}
}
else
RSB_ERROR(RSB_ERRM_NL);
}
}
else
#endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
#ifdef RSB_NUMERICAL_TYPE_FLOAT
if( mtxAp->typecode == RSB_NUMERICAL_TYPE_FLOAT )
{
{
float *VA=mtxAp->VA;
if(rsb__is_coo_matrix(mtxAp))
{
if(RSB_DO_FLAG_HAS(mtxAp->flags,RSB_FLAG_USE_HALFWORD_INDICES))
{
RSB_DECLARE_CONST_HALFCOO_ARRAYS_FROM_MATRIX(IA,JA,mtxAp)
if(rowmajor)
for(n=0;RSB_LIKELY(nnnz);++n)
*(float*)(RSB_BLOCK_ROWMAJOR_ADDRESS(Bp,ldb,nr,nc,IA[n]+roff,JA[n]+coff,mtxAp->el_size))+=(*(float*)alphap)*(VA[n]);
else
for(n=0;RSB_LIKELY(nnnz);++n)
*(float*)(RSB_BLOCK_COLMAJOR_ADDRESS(Bp,ldb,nr,nc,IA[n]+roff,JA[n]+coff,mtxAp->el_size))+=(*(float*)alphap)*(VA[n]);
}
else
{
RSB_DECLARE_CONST_FULLCOO_ARRAYS_FROM_MATRIX(IA,JA,mtxAp)
if(rowmajor)
for(n=0;RSB_LIKELY(nnnz);++n)
*(float*)(RSB_BLOCK_ROWMAJOR_ADDRESS(Bp,ldb,nr,nc,IA[n]+roff,JA[n]+coff,mtxAp->el_size))+=(*(float*)alphap)*(VA[n]);
else
for(n=0;RSB_LIKELY(nnnz);++n)
*(float*)(RSB_BLOCK_COLMAJOR_ADDRESS(Bp,ldb,nr,nc,IA[n]+roff,JA[n]+coff,mtxAp->el_size))+=(*(float*)alphap)*(VA[n]);
}
}
else
if(rsb__is_csr_matrix(mtxAp))
{
rsb_nnz_idx_t n,i;
if(RSB_DO_FLAG_HAS(mtxAp->flags,(RSB_FLAG_USE_HALFWORD_INDICES)))
{
RSB_DECLARE_CONST_HALFCSR_ARRAYS_FROM_MATRIX(PA,JA,mtxAp)
if(rowmajor)
{
for(i=0;RSB_LIKELY(inr);++i)
for(n=PA[i];RSB_LIKELY(nel_size))+=(*(float*)alphap)*(VA[n]);
}
else
{
for(i=0;RSB_LIKELY(inr);++i)
for(n=PA[i];RSB_LIKELY(nel_size))+=(*(float*)alphap)*(VA[n]);
}
}
else
{
RSB_DECLARE_CONST_FULLCSR_ARRAYS_FROM_MATRIX(PA,JA,mtxAp)
if(rowmajor)
{
for(i=0;RSB_LIKELY(inr);++i)
for(n=PA[i];RSB_LIKELY(nel_size))+=(*(float*)alphap)*(VA[n]);
}
else
{
for(i=0;RSB_LIKELY(inr);++i)
for(n=PA[i];RSB_LIKELY(nel_size))+=(*(float*)alphap)*(VA[n]);
}
}
}
else
RSB_ERROR(RSB_ERRM_NL);
}
}
else
#endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
#ifdef RSB_NUMERICAL_TYPE_FLOAT_COMPLEX
if( mtxAp->typecode == RSB_NUMERICAL_TYPE_FLOAT_COMPLEX )
{
{
float complex *VA=mtxAp->VA;
if(rsb__is_coo_matrix(mtxAp))
{
if(RSB_DO_FLAG_HAS(mtxAp->flags,RSB_FLAG_USE_HALFWORD_INDICES))
{
RSB_DECLARE_CONST_HALFCOO_ARRAYS_FROM_MATRIX(IA,JA,mtxAp)
if(rowmajor)
for(n=0;RSB_LIKELY(nnnz);++n)
*(float complex*)(RSB_BLOCK_ROWMAJOR_ADDRESS(Bp,ldb,nr,nc,IA[n]+roff,JA[n]+coff,mtxAp->el_size))+=(*(float complex*)alphap)*(VA[n]);
else
for(n=0;RSB_LIKELY(nnnz);++n)
*(float complex*)(RSB_BLOCK_COLMAJOR_ADDRESS(Bp,ldb,nr,nc,IA[n]+roff,JA[n]+coff,mtxAp->el_size))+=(*(float complex*)alphap)*(VA[n]);
}
else
{
RSB_DECLARE_CONST_FULLCOO_ARRAYS_FROM_MATRIX(IA,JA,mtxAp)
if(rowmajor)
for(n=0;RSB_LIKELY(nnnz);++n)
*(float complex*)(RSB_BLOCK_ROWMAJOR_ADDRESS(Bp,ldb,nr,nc,IA[n]+roff,JA[n]+coff,mtxAp->el_size))+=(*(float complex*)alphap)*(VA[n]);
else
for(n=0;RSB_LIKELY(nnnz);++n)
*(float complex*)(RSB_BLOCK_COLMAJOR_ADDRESS(Bp,ldb,nr,nc,IA[n]+roff,JA[n]+coff,mtxAp->el_size))+=(*(float complex*)alphap)*(VA[n]);
}
}
else
if(rsb__is_csr_matrix(mtxAp))
{
rsb_nnz_idx_t n,i;
if(RSB_DO_FLAG_HAS(mtxAp->flags,(RSB_FLAG_USE_HALFWORD_INDICES)))
{
RSB_DECLARE_CONST_HALFCSR_ARRAYS_FROM_MATRIX(PA,JA,mtxAp)
if(rowmajor)
{
for(i=0;RSB_LIKELY(inr);++i)
for(n=PA[i];RSB_LIKELY(nel_size))+=(*(float complex*)alphap)*(VA[n]);
}
else
{
for(i=0;RSB_LIKELY(inr);++i)
for(n=PA[i];RSB_LIKELY(nel_size))+=(*(float complex*)alphap)*(VA[n]);
}
}
else
{
RSB_DECLARE_CONST_FULLCSR_ARRAYS_FROM_MATRIX(PA,JA,mtxAp)
if(rowmajor)
{
for(i=0;RSB_LIKELY(inr);++i)
for(n=PA[i];RSB_LIKELY(nel_size))+=(*(float complex*)alphap)*(VA[n]);
}
else
{
for(i=0;RSB_LIKELY(inr);++i)
for(n=PA[i];RSB_LIKELY(nel_size))+=(*(float complex*)alphap)*(VA[n]);
}
}
}
else
RSB_ERROR(RSB_ERRM_NL);
}
}
else
#endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
#ifdef RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX
if( mtxAp->typecode == RSB_NUMERICAL_TYPE_DOUBLE_COMPLEX )
{
{
double complex *VA=mtxAp->VA;
if(rsb__is_coo_matrix(mtxAp))
{
if(RSB_DO_FLAG_HAS(mtxAp->flags,RSB_FLAG_USE_HALFWORD_INDICES))
{
RSB_DECLARE_CONST_HALFCOO_ARRAYS_FROM_MATRIX(IA,JA,mtxAp)
if(rowmajor)
for(n=0;RSB_LIKELY(nnnz);++n)
*(double complex*)(RSB_BLOCK_ROWMAJOR_ADDRESS(Bp,ldb,nr,nc,IA[n]+roff,JA[n]+coff,mtxAp->el_size))+=(*(double complex*)alphap)*(VA[n]);
else
for(n=0;RSB_LIKELY(nnnz);++n)
*(double complex*)(RSB_BLOCK_COLMAJOR_ADDRESS(Bp,ldb,nr,nc,IA[n]+roff,JA[n]+coff,mtxAp->el_size))+=(*(double complex*)alphap)*(VA[n]);
}
else
{
RSB_DECLARE_CONST_FULLCOO_ARRAYS_FROM_MATRIX(IA,JA,mtxAp)
if(rowmajor)
for(n=0;RSB_LIKELY(nnnz);++n)
*(double complex*)(RSB_BLOCK_ROWMAJOR_ADDRESS(Bp,ldb,nr,nc,IA[n]+roff,JA[n]+coff,mtxAp->el_size))+=(*(double complex*)alphap)*(VA[n]);
else
for(n=0;RSB_LIKELY(nnnz);++n)
*(double complex*)(RSB_BLOCK_COLMAJOR_ADDRESS(Bp,ldb,nr,nc,IA[n]+roff,JA[n]+coff,mtxAp->el_size))+=(*(double complex*)alphap)*(VA[n]);
}
}
else
if(rsb__is_csr_matrix(mtxAp))
{
rsb_nnz_idx_t n,i;
if(RSB_DO_FLAG_HAS(mtxAp->flags,(RSB_FLAG_USE_HALFWORD_INDICES)))
{
RSB_DECLARE_CONST_HALFCSR_ARRAYS_FROM_MATRIX(PA,JA,mtxAp)
if(rowmajor)
{
for(i=0;RSB_LIKELY(inr);++i)
for(n=PA[i];RSB_LIKELY(nel_size))+=(*(double complex*)alphap)*(VA[n]);
}
else
{
for(i=0;RSB_LIKELY(inr);++i)
for(n=PA[i];RSB_LIKELY(nel_size))+=(*(double complex*)alphap)*(VA[n]);
}
}
else
{
RSB_DECLARE_CONST_FULLCSR_ARRAYS_FROM_MATRIX(PA,JA,mtxAp)
if(rowmajor)
{
for(i=0;RSB_LIKELY(inr);++i)
for(n=PA[i];RSB_LIKELY(nel_size))+=(*(double complex*)alphap)*(VA[n]);
}
else
{
for(i=0;RSB_LIKELY(inr);++i)
for(n=PA[i];RSB_LIKELY(nel_size))+=(*(double complex*)alphap)*(VA[n]);
}
}
}
else
RSB_ERROR(RSB_ERRM_NL);
}
}
else
#endif /* RSB_M4_NUMERICAL_TYPE_PREPROCESSOR_SYMBOL(mtype) */
return RSB_ERR_UNSUPPORTED_TYPE;
return RSB_ERR_NO_ERROR;
err:
return RSB_ERR_GENERIC_ERROR;
}
/* @endcond */
librsb-1.2.0-rc7/rsb_coo_symm.c 0000644 0001750 0001750 00000005154 12520763126 013251 0000000 0000000 /*
Copyright (C) 2008-2015 Michele Martone
This file is part of librsb.
librsb is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
librsb is distributed in the hope that it will be useful, but WITHOUT
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public
License along with librsb; see the file COPYING.
If not, see .
*/
/* @cond INNERDOC */
/*!
* @file
* @author Michele Martone
* @brief
* This source file contains functions for COO symmetry handling.
* */
#include "rsb_internals.h"
rsb_err_t rsb__reallocate_with_symmetry( rsb_coo_idx_t ** IA, rsb_coo_idx_t ** JA, void **coo, rsb_nnz_idx_t * nnz, rsb_type_t typecode )
{
/*!
* \ingroup gr_internals
* Assuming that for a symmetric matrix we are given
* these arrays containing (i,j) pairs, with no (j,i) pairs at all (except j=i),
* we reallocate arrays (if possible) and fill them with the symmetric elements
* with no duplicate.
*
* note : this is a slow service/debug function, not a high performance one.
* */
rsb_coo_idx_t * new_IA, *new_JA;
void * ncoo;
rsb_nnz_idx_t nnnz;
size_t i,odel = 0, el_size = RSB_NUMERICAL_TYPE_SIZE(typecode);/* off diagonal elements */
rsb_err_t errval = RSB_ERR_NO_ERROR;
if(!IA || !JA || !*IA || !*JA || !nnz || !*nnz || !el_size)
return RSB_ERR_BADARGS;
nnnz = *nnz;
for(i=0;i