diff --git a/include/atoms/affine.h b/include/atoms/affine.h index 9527d3c..49b4637 100644 --- a/include/atoms/affine.h +++ b/include/atoms/affine.h @@ -19,8 +19,7 @@ #define AFFINE_H #include "expr.h" -#include "subexpr.h" -#include "utils/CSR_Matrix.h" +#include "utils/CSR_matrix.h" expr *new_add(expr *left, expr *right); expr *new_neg(expr *child); @@ -45,7 +44,7 @@ expr *new_transpose(expr *child); /* Left matrix multiplication: A @ f(x) where A is a constant sparse matrix. param_node is NULL for fixed constants. We currently do not support sparse parameters, so param_node should always be null. */ -expr *new_left_matmul(expr *param_node, expr *u, const CSR_Matrix *A); +expr *new_left_matmul(expr *param_node, expr *u, const CSR_matrix *A); /* Left matrix multiplication: A @ f(x) where A is a constant dense matrix (in row-major, m x n, with values given by 'data') or a parameter @@ -59,7 +58,7 @@ expr *new_left_matmul_dense(expr *param_node, expr *u, int m, int n, /* Right matrix multiplication: f(x) @ A where A is a constant sparse matrix. We currently do not support sparse parameters, so param_node should always be null. */ -expr *new_right_matmul(expr *param_node, expr *u, const CSR_Matrix *A); +expr *new_right_matmul(expr *param_node, expr *u, const CSR_matrix *A); /* Right matrix multiplication: f(x) @ A where A is a constant dense matrix (in row-major, m x n, with values given by 'data') or a parameter diff --git a/include/atoms/bivariate_full_dom.h b/include/atoms/bivariate_full_dom.h index a864c7b..44e3f01 100644 --- a/include/atoms/bivariate_full_dom.h +++ b/include/atoms/bivariate_full_dom.h @@ -22,7 +22,7 @@ expr *new_elementwise_mult(expr *left, expr *right); -/* Matrix multiplication: Z = X @ Y */ +/* matrix multiplication: Z = X @ Y */ expr *new_matmul(expr *x, expr *y); #endif /* BIVARIATE_FULL_DOM_H */ diff --git a/include/atoms/non_elementwise_full_dom.h b/include/atoms/non_elementwise_full_dom.h index 7de046c..65c6070 100644 --- a/include/atoms/non_elementwise_full_dom.h +++ b/include/atoms/non_elementwise_full_dom.h @@ -20,9 +20,9 @@ #include "expr.h" #include "subexpr.h" -#include "utils/CSR_Matrix.h" +#include "utils/CSR_matrix.h" -expr *new_quad_form(expr *child, CSR_Matrix *Q); +expr *new_quad_form(expr *child, CSR_matrix *Q); /* product of all entries, without axis argument */ expr *new_prod(expr *child); diff --git a/include/expr.h b/include/expr.h index 1c54b2e..fbe5a67 100644 --- a/include/expr.h +++ b/include/expr.h @@ -18,8 +18,9 @@ #ifndef EXPR_H #define EXPR_H -#include "utils/CSC_Matrix.h" -#include "utils/CSR_Matrix.h" +#include "utils/CSC_matrix.h" +#include "utils/CSR_matrix.h" +#include "utils/matrix.h" #include #include #include @@ -44,16 +45,16 @@ typedef struct { double *dwork; int *iwork; - CSC_Matrix *jacobian_csc; - int *csc_work; /* for CSR-CSC conversion */ + CSC_matrix *jacobian_csc; + int *csc_work; /* for CSR_matrix-CSC_matrix conversion */ /* jacobian_csc_filled is only used for affine functions to avoid redundant conversions. Could become relevant for non-affine functions if we start supporting common subexpressions on the Python side. */ bool jacobian_csc_filled; double *local_jac_diag; /* cached f'(g(x)) diagonal */ - CSR_Matrix *hess_term1; /* Jg^T D Jg workspace */ - CSR_Matrix *hess_term2; /* child wsum_hess workspace */ + matrix *hess_term1; /* Jg^T D Jg workspace */ + matrix *hess_term2; /* child wsum_hess workspace */ } Expr_Work; /* Base expression node structure */ @@ -70,8 +71,8 @@ typedef struct expr // oracle related quantities // ------------------------------------------------------------------------ double *value; - CSR_Matrix *jacobian; - CSR_Matrix *wsum_hess; + matrix *jacobian; + matrix *wsum_hess; forward_fn forward; jacobian_init_fn jacobian_init_impl; wsum_hess_init_fn wsum_hess_init_impl; @@ -110,7 +111,7 @@ void free_expr(expr *node); void jacobian_init(expr *node); void wsum_hess_init(expr *node); -/* Initialize CSC form of the Jacobian from the CSR Jacobian. +/* Initialize CSC_matrix form of the Jacobian from the CSR_matrix Jacobian. * Must be called after jacobian_init. */ void jacobian_csc_init(expr *node); diff --git a/include/old-code/old_CSR.h b/include/old-code/old_CSR.h index 41af333..7033246 100644 --- a/include/old-code/old_CSR.h +++ b/include/old-code/old_CSR.h @@ -18,29 +18,29 @@ #ifndef OLD_CSR_H #define OLD_CSR_H -#include "utils/CSR_Matrix.h" +#include "utils/CSR_matrix.h" /* Build (I_p kron A) = blkdiag(A, A, ..., A) of size (p*A->m) x (p*A->n) */ -CSR_Matrix *block_diag_repeat_csr(const CSR_Matrix *A, int p); +CSR_matrix *block_diag_repeat_csr(const CSR_matrix *A, int p); /* Build (A kron I_p) of size (A->m * p) x (A->n * p) with nnz = A->nnz * p. */ -CSR_Matrix *kron_identity_csr(const CSR_Matrix *A, int p); +CSR_matrix *kron_identity_csr(const CSR_matrix *A, int p); /* Computes values of the row matrix C = z^T A (column indices must have been pre-computed) and transposed matrix AT must be provided) */ -void Ax_csr_fill_values(const CSR_Matrix *AT, const double *z, CSR_Matrix *C); +void Ax_csr_fill_values(const CSR_matrix *AT, const double *z, CSR_matrix *C); -/* Insert value into CSR matrix A with just one row at col_idx. Assumes that A +/* Insert value into CSR_matrix matrix A with just one row at col_idx. Assumes that A has enough space and that A does not have an element at col_idx. It does update nnz. */ -void csr_insert_value(CSR_Matrix *A, int col_idx, double value); +void csr_insert_value(CSR_matrix *A, int col_idx, double value); -/* Compute C = diag(d) * A where d is an array and A, C are CSR matrices +/* Compute C = diag(d) * A where d is an array and A, C are CSR_matrix matrices * d must have length m * C must be pre-allocated with same dimensions as A */ -void diag_csr_mult(const double *d, const CSR_Matrix *A, CSR_Matrix *C); +void diag_csr_mult(const double *d, const CSR_matrix *A, CSR_matrix *C); /* y = Ax, where y is returned as dense (no column offset) */ -void Ax_csr_wo_offset(const CSR_Matrix *A, const double *x, double *y); +void Ax_csr_wo_offset(const CSR_matrix *A, const double *x, double *y); #endif /* OLD_CSR_H */ diff --git a/include/old-code/old_CSR_sum.h b/include/old-code/old_CSR_sum.h index 72d0b30..3582f31 100644 --- a/include/old-code/old_CSR_sum.h +++ b/include/old-code/old_CSR_sum.h @@ -18,44 +18,44 @@ #ifndef OLD_CSR_SUM_H #define OLD_CSR_SUM_H -#include "utils/CSR_Matrix.h" +#include "utils/CSR_matrix.h" -/* Compute C = A + B where A, B, C are CSR matrices +/* Compute C = A + B where A, B, C are CSR_matrix matrices * A and B must have same dimensions * C must be pre-allocated with sufficient nnz capacity. * C must be different from A and B */ -void sum_csr_matrices(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C); +void sum_csr_matrices(const CSR_matrix *A, const CSR_matrix *B, CSR_matrix *C); -/* Compute C = diag(d1) * A + diag(d2) * B where A, B, C are CSR matrices */ -void sum_scaled_csr_matrices(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C, +/* Compute C = diag(d1) * A + diag(d2) * B where A, B, C are CSR_matrix matrices */ +void sum_scaled_csr_matrices(const CSR_matrix *A, const CSR_matrix *B, CSR_matrix *C, const double *d1, const double *d2); /* forward declaration */ struct int_double_pair; /* Sum all rows of A into a single row matrix C */ -void sum_all_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, +void sum_all_rows_csr(const CSR_matrix *A, CSR_matrix *C, struct int_double_pair *pairs); /* Sum blocks of rows of A into a matrix C */ -void sum_block_of_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, +void sum_block_of_rows_csr(const CSR_matrix *A, CSR_matrix *C, struct int_double_pair *pairs, int row_block_size); /* Sum evenly spaced rows of A into a matrix C */ -void sum_evenly_spaced_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, +void sum_evenly_spaced_rows_csr(const CSR_matrix *A, CSR_matrix *C, struct int_double_pair *pairs, int row_spacing); /* Sum evenly spaced rows of A starting at offset into a row matrix C */ -void sum_spaced_rows_into_row_csr(const CSR_Matrix *A, CSR_Matrix *C, +void sum_spaced_rows_into_row_csr(const CSR_matrix *A, CSR_matrix *C, struct int_double_pair *pairs, int offset, int spacing); /* Fill values of summed rows using precomputed idx_map and sparsity of C */ -void sum_all_rows_csr_fill_values(const CSR_Matrix *A, CSR_Matrix *C, +void sum_all_rows_csr_fill_values(const CSR_matrix *A, CSR_matrix *C, const int *idx_map); /* Fill values of summed block rows using precomputed idx_map */ -void sum_block_of_rows_csr_fill_values(const CSR_Matrix *A, CSR_Matrix *C, +void sum_block_of_rows_csr_fill_values(const CSR_matrix *A, CSR_matrix *C, const int *idx_map); #endif /* OLD_CSR_SUM_H */ diff --git a/include/old-code/old_affine.h b/include/old-code/old_affine.h index 3b08491..cf0e598 100644 --- a/include/old-code/old_affine.h +++ b/include/old-code/old_affine.h @@ -19,8 +19,8 @@ #define OLD_AFFINE_H #include "expr.h" -#include "utils/CSR_Matrix.h" +#include "utils/CSR_matrix.h" -expr *new_linear(expr *u, const CSR_Matrix *A, const double *b); +expr *new_linear(expr *u, const CSR_matrix *A, const double *b); #endif /* OLD_AFFINE_H */ diff --git a/include/old-code/old_permuted_dense.h b/include/old-code/old_permuted_dense.h new file mode 100644 index 0000000..38ac5cf --- /dev/null +++ b/include/old-code/old_permuted_dense.h @@ -0,0 +1,76 @@ +/* + * Copyright 2026 Daniel Cederberg and William Zhang + * + * This file is part of the SparseDiffEngine project. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef OLD_PERMUTED_DENSE_H +#define OLD_PERMUTED_DENSE_H + +#include "utils/CSR_matrix.h" +#include "utils/permuted_dense.h" + +/* Legacy CSR-based (PD, Sparse) BTA / BTDA kernels. + + Mathematically equivalent to BTA_pd_csc_alloc / BTDA_pd_csc_fill_values + in src/utils/permuted_dense.c — they all compute C = B^T (diag(d)) A + for B PD and A sparse. The matrix_BTA dispatcher used to choose between + the CSR-here and CSC-in-utils variants; after a benchmark on + trimmed_log_reg-shaped workloads we committed to CSC and moved these + kernels out of production paths. + + Kept here as a reference implementation, as cross-comparison fodder for + tests (test_BTA_pd_csc_matches_csr), and as the CSR side of the + profile_BTA_pd_csr_vs_csc microbenchmark. */ + +/* Allocate a new permuted_dense for C = B^T A where B is PD and A is + CSR-sparse. Output is PD with row_perm = B->col_perm and col_perm = the + sorted union of columns appearing in A's rows at positions row_perm_B. + Dense block size = (B->n0, |col_active|). Values uninitialized. */ +matrix *BTA_pd_csr_alloc(const permuted_dense *B, const CSR_matrix *A); + +/* Fill C->X = X_B^T @ A_sub_dense, where A_sub_dense is A's rows at + positions row_perm_B, columns restricted to C's col_perm, scattered to a + dense buffer. C must have the structure produced by BTA_pd_csr_alloc. */ +void BTA_pd_csr_fill_values(const permuted_dense *B, const CSR_matrix *A, + permuted_dense *C); + +/* BTDA variant: C->X = X_B^T diag(d) A_sub_dense. d may be NULL (treated + as identity scaling). C must have the structure produced by + BTA_pd_csr_alloc. */ +void BTDA_pd_csr_fill_values(const permuted_dense *B, const double *d, + const CSR_matrix *A, permuted_dense *C); + +/* Legacy CSR-pd kernels (B=CSR, A=PD), formerly in src/utils/permuted_dense.c. + Production now dispatches the (PD A, sparse B) branch through CSC-pd + kernels (BTA_csc_pd_alloc / BTDA_csc_pd_fill_values in utils/permuted_dense.h), + so these CSR variants live here as reference implementations and as + targets for the direct unit tests in tests/old-code. */ + +/* Allocate a new permuted_dense for C = B^T A where B is CSR-sparse and A + is PD. Output is PD with row_perm = the sorted union of columns appearing + in B's rows at positions row_perm_A, and col_perm = A->col_perm. */ +matrix *BTA_csr_pd_alloc(const CSR_matrix *B_csr, const permuted_dense *A); + +/* No-d BTA fill. C must have the structure produced by BTA_csr_pd_alloc. */ +void BTA_csr_pd_fill_values(const CSR_matrix *B_csr, const permuted_dense *A, + permuted_dense *C); + +/* BTDA variant: C->X = B_sub_dense^T diag(d) X_A. d may be NULL (treated + as identity scaling). C must have the structure produced by + BTA_csr_pd_alloc. */ +void BTDA_csr_pd_fill_values(const CSR_matrix *B_csr, const double *d, + const permuted_dense *A, permuted_dense *C); + +#endif /* OLD_PERMUTED_DENSE_H */ diff --git a/include/problem.h b/include/problem.h index 7e68a28..530907c 100644 --- a/include/problem.h +++ b/include/problem.h @@ -19,8 +19,8 @@ #define PROBLEM_H #include "expr.h" -#include "utils/COO_Matrix.h" -#include "utils/CSR_Matrix.h" +#include "utils/COO_matrix.h" +#include "utils/CSR_matrix.h" #include "utils/Timer.h" #include @@ -59,11 +59,11 @@ typedef struct problem double *gradient_values; /* allocated by problem_init_derivatives */ - CSR_Matrix *jacobian; - CSR_Matrix *lagrange_hessian; + CSR_matrix *jacobian; + CSR_matrix *lagrange_hessian; int *hess_idx_map; /* maps all wsum_hess nnz to lagrange_hessian */ - COO_Matrix *jacobian_coo; - COO_Matrix *lagrange_hessian_coo; /* lower triangular part stored in COO */ + COO_matrix *jacobian_coo; + COO_matrix *lagrange_hessian_coo; /* lower triangular part stored in COO */ /* for the affine shortcut we keep track of the first time the jacobian and * hessian are called */ diff --git a/include/subexpr.h b/include/subexpr.h index f97feef..0dd6c8e 100644 --- a/include/subexpr.h +++ b/include/subexpr.h @@ -19,8 +19,8 @@ #define SUBEXPR_H #include "expr.h" -#include "utils/CSC_Matrix.h" -#include "utils/CSR_Matrix.h" +#include "utils/CSC_matrix.h" +#include "utils/CSR_matrix.h" #include "utils/matrix.h" /* Forward declaration */ @@ -40,7 +40,7 @@ typedef struct parameter_expr } parameter_expr; /* Linear operator: y = A * x + b - * The matrix A is stored as node->jacobian (CSR). */ + * The matrix A is stored as node->jacobian (CSR_matrix). */ typedef struct linear_op_expr { expr base; @@ -58,8 +58,8 @@ typedef struct power_expr typedef struct quad_form_expr { expr base; - CSR_Matrix *Q; - CSC_Matrix *QJf; /* Q * J_f in CSC (for chain rule hessian) */ + CSR_matrix *Q; + CSC_matrix *QJf; /* Q * J_f in CSC_matrix (for chain rule hessian) */ } quad_form_expr; /* Sum reduction along an axis */ @@ -102,19 +102,19 @@ typedef struct hstack_expr expr base; expr **args; int n_args; - CSR_Matrix *CSR_work; /* for summing Hessians of children */ + CSR_matrix *CSR_work; /* for summing Hessians of children */ } hstack_expr; /* Elementwise multiplication */ typedef struct elementwise_mult_expr { expr base; - CSR_Matrix *CSR_work1; /* C = Jg2^T diag(w) Jg1 */ - CSR_Matrix *CSR_work2; /* CT = C^T */ - int *idx_map_C; /* C[j] -> wsum_hess pos */ - int *idx_map_CT; /* CT[j] -> wsum_hess pos */ - int *idx_map_Hx; /* x->wsum_hess[j] -> pos */ - int *idx_map_Hy; /* y->wsum_hess[j] -> pos */ + matrix *C; /* C = Jg2^T diag(w) Jg1 (Sparse or PD) */ + matrix *CT; /* CT = C^T; same concrete type as C */ + int *idx_map_C; /* C[j] -> wsum_hess pos */ + int *idx_map_CT; /* CT[j] -> wsum_hess pos */ + int *idx_map_Hx; /* x->wsum_hess[j] -> pos */ + int *idx_map_Hy; /* y->wsum_hess[j] -> pos */ } elementwise_mult_expr; /* Left matrix multiplication: y = A * f(x) where f(x) is an expression. Note that @@ -123,11 +123,11 @@ important distinction compared to linear_op_expr. */ typedef struct left_matmul_expr { expr base; - Matrix *A; - Matrix *AT; + matrix *A; + matrix *AT; int n_blocks; - CSC_Matrix *Jchild_CSC; - CSC_Matrix *J_CSC; + CSC_matrix *Jchild_CSC; + CSC_matrix *J_CSC; int *csc_to_csr_work; expr *param_source; void (*refresh_param_values)(struct left_matmul_expr *); @@ -151,7 +151,7 @@ typedef struct vector_mult_expr /* 1D convolution: y = conv(a, child) where a is a length-m kernel held by * param_source. Output has size (m + n - 1) where n is the child length. * Forward and wsum_hess backprop are computed as direct loops; for Jacobian - * we materialize T(a) as a CSR once at jacobian_init and reuse the engine's + * we materialize T(a) as a CSR_matrix once at jacobian_init and reuse the engine's * block-left-mult machinery for composite children. */ typedef struct convolve_expr { @@ -159,8 +159,8 @@ typedef struct convolve_expr expr *param_source; /* length-m kernel */ int m; /* kernel length */ int n; /* input length */ - CSR_Matrix *T; /* (m+n-1) x n convolution matrix */ - CSC_Matrix *Jchild_CSC; + CSR_matrix *T; /* (m+n-1) x n convolution matrix */ + CSC_matrix *Jchild_CSC; } convolve_expr; /* Bivariate matrix multiplication: Z = f(u) @ g(u) where both children @@ -169,16 +169,16 @@ typedef struct matmul_expr { expr base; /* Jacobian workspace */ - CSR_Matrix *term1_CSR; /* (Y^T x I_m) @ J_f */ - CSR_Matrix *term2_CSR; /* (I_n x X) @ J_g */ + CSR_matrix *term1_CSR; /* (Y^T x I_m) @ J_f */ + CSR_matrix *term2_CSR; /* (I_n x X) @ J_g */ /* Hessian workspace (composite only) */ - CSR_Matrix *B; /* cross-Hessian B(w), mk x kn */ - CSR_Matrix *BJg; /* B @ J_g */ - CSC_Matrix *BJg_CSC; /* BJg in CSC */ - int *BJg_csc_work; /* CSR-to-CSC workspace */ - CSR_Matrix *C; /* J_f^T @ B @ J_g */ - CSR_Matrix *CT; /* C^T */ + CSR_matrix *B; /* cross-Hessian B(w), mk x kn */ + CSR_matrix *BJg; /* B @ J_g */ + CSC_matrix *BJg_CSC; /* BJg in CSC_matrix */ + int *BJg_csc_work; /* CSR_matrix-to-CSC_matrix workspace */ + CSR_matrix *C; /* J_f^T @ B @ J_g */ + CSR_matrix *CT; /* C^T */ int *idx_map_C; int *idx_map_CT; int *idx_map_Hf; @@ -194,14 +194,6 @@ typedef struct index_expr bool has_duplicates; /* True if indices have duplicates (affects Hessian path) */ } index_expr; -/* Broadcast types */ -typedef enum -{ - BROADCAST_ROW, /* (1, n) -> (m, n) */ - BROADCAST_COL, /* (m, 1) -> (m, n) */ - BROADCAST_SCALAR /* (1, 1) -> (m, n) */ -} broadcast_type; - typedef struct broadcast_expr { expr base; diff --git a/include/utils/COO_Matrix.h b/include/utils/COO_matrix.h similarity index 57% rename from include/utils/COO_Matrix.h rename to include/utils/COO_matrix.h index 51a39e9..64b57b4 100644 --- a/include/utils/COO_Matrix.h +++ b/include/utils/COO_matrix.h @@ -15,24 +15,24 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef COO_MATRIX_H -#define COO_MATRIX_H +#ifndef COO_matrix_H +#define COO_matrix_H -#include "CSR_Matrix.h" +#include "CSR_matrix.h" -/* COO (Coordinate) Sparse Matrix Format +/* COO (Coordinate) Sparse matrix Format * * For an m x n matrix with nnz nonzeros: * - rows: array of size nnz containing row indices * - cols: array of size nnz containing column indices * - x: array of size nnz containing values - * - value_map: array of size nnz mapping CSR entries to COO entries (for + * - value_map: array of size nnz mapping CSR_matrix entries to COO entries (for * lower-triangular COO) * - m: number of rows * - n: number of columns * - nnz: number of nonzero entries */ -typedef struct COO_Matrix +typedef struct COO_matrix { int *rows; int *cols; @@ -41,20 +41,19 @@ typedef struct COO_Matrix int m; int n; int nnz; -} COO_Matrix; +} COO_matrix; -/* Construct a COO matrix from a CSR matrix */ -COO_Matrix *new_coo_matrix(const CSR_Matrix *A); +/* COO from CSR */ +COO_matrix *new_COO_matrix(const CSR_matrix *A); -/* Construct a COO matrix containing only the lower-triangular - * entries (col <= row) of a symmetric CSR matrix. Populates - * value_map so that refresh_lower_triangular_coo can update - * values without recomputing structure. */ -COO_Matrix *new_coo_matrix_lower_triangular(const CSR_Matrix *A); +/* Construct COO containing only the lower-triangular entries (col <= row) of a + symmetric CSR. Populates value_map so that refresh_lower_triangular_coo can + update values without recomputing structure. */ +COO_matrix *new_COO_matrix_lower_triangular(const CSR_matrix *A); -/* Refresh COO values from a new CSR value array using value_map */ -void refresh_lower_triangular_coo(COO_Matrix *coo, const double *vals); +/* Refresh COO values from a new CSR_matrix value array using value_map */ +void refresh_lower_triangular_coo(COO_matrix *coo, const double *vals); -void free_coo_matrix(COO_Matrix *matrix); +void free_COO_matrix(COO_matrix *matrix); -#endif /* COO_MATRIX_H */ +#endif /* COO_matrix_H */ diff --git a/include/utils/CSC_Matrix.h b/include/utils/CSC_matrix.h similarity index 56% rename from include/utils/CSC_Matrix.h rename to include/utils/CSC_matrix.h index f5236f6..34d3155 100644 --- a/include/utils/CSC_Matrix.h +++ b/include/utils/CSC_matrix.h @@ -15,12 +15,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef CSC_MATRIX_H -#define CSC_MATRIX_H +#ifndef CSC_matrix_H +#define CSC_matrix_H -#include "CSR_Matrix.h" +#include "CSR_matrix.h" -/* CSC (Compressed Sparse Column) Matrix Format +/* CSC_matrix (Compressed Sparse Column) matrix Format * * For an m x n matrix with nnz nonzeros: * - p: array of size (n + 1) indicating start of each column @@ -30,7 +30,7 @@ * - n: number of columns * - nnz: number of nonzero entries */ -typedef struct CSC_Matrix +typedef struct CSC_matrix { int *p; int *i; @@ -38,43 +38,43 @@ typedef struct CSC_Matrix int m; int n; int nnz; -} CSC_Matrix; +} CSC_matrix; /* constructor and destructor */ -CSC_Matrix *new_csc_matrix(int m, int n, int nnz); -void free_csc_matrix(CSC_Matrix *matrix); +CSC_matrix *new_CSC_matrix(int m, int n, int nnz); +void free_CSC_matrix(CSC_matrix *matrix); /* Fill sparsity of C = A^T D A for diagonal D */ -CSR_Matrix *ATA_alloc(const CSC_Matrix *A); +CSR_matrix *ATA_alloc(const CSC_matrix *A); /* Fill sparsity of C = B^T D A for diagonal D */ -CSR_Matrix *BTA_alloc(const CSC_Matrix *A, const CSC_Matrix *B); +CSR_matrix *BTA_alloc(const CSC_matrix *A, const CSC_matrix *B); /* Fill sparsity of C = BA, where B is symmetric. */ -CSC_Matrix *symBA_alloc(const CSR_Matrix *B, const CSC_Matrix *A); +CSC_matrix *symBA_alloc(const CSR_matrix *B, const CSC_matrix *A); /* Compute values for C = A^T D A (null d corresponds to D as identity) */ -void ATDA_fill_values(const CSC_Matrix *A, const double *d, CSR_Matrix *C); +void ATDA_fill_values(const CSC_matrix *A, const double *d, CSR_matrix *C); /* Compute values for C = B^T D A (null d corresonds to D as identity) */ -void BTDA_fill_values(const CSC_Matrix *A, const CSC_Matrix *B, const double *d, - CSR_Matrix *C); +void BTDA_fill_values(const CSC_matrix *A, const CSC_matrix *B, const double *d, + CSR_matrix *C); /* Fill values of C = BA. The matrix B does not have to be symmetric */ -void BA_fill_values(const CSR_Matrix *B, const CSC_Matrix *A, CSC_Matrix *C); +void BA_fill_values(const CSR_matrix *B, const CSC_matrix *A, CSC_matrix *C); /* Fill values of C = x^T A. The matrix C must have filled sparsity. */ -void yTA_fill_values(const CSC_Matrix *A, const double *x, CSR_Matrix *C); +void yTA_fill_values(const CSC_matrix *A, const double *x, CSR_matrix *C); -/* Count nonzero columns of a CSC matrix */ -int count_nonzero_cols_csc(const CSC_Matrix *A); +/* Count nonzero columns of a CSC_matrix matrix */ +int count_nonzero_cols_csc(const CSC_matrix *A); -/* convert from CSR to CSC format */ -CSC_Matrix *csr_to_csc_alloc(const CSR_Matrix *A, int *iwork); -void csr_to_csc_fill_values(const CSR_Matrix *A, CSC_Matrix *C, int *iwork); +/* convert from CSR_matrix to CSC_matrix format */ +CSC_matrix *csr_to_csc_alloc(const CSR_matrix *A, int *iwork); +void csr_to_csc_fill_values(const CSR_matrix *A, CSC_matrix *C, int *iwork); -/* convert from CSC to CSR format */ -CSR_Matrix *csc_to_csr_alloc(const CSC_Matrix *A, int *iwork); -void csc_to_csr_fill_values(const CSC_Matrix *A, CSR_Matrix *C, int *iwork); +/* convert from CSC_matrix to CSR_matrix format */ +CSR_matrix *csc_to_csr_alloc(const CSC_matrix *A, int *iwork); +void csc_to_csr_fill_values(const CSC_matrix *A, CSR_matrix *C, int *iwork); -#endif /* CSC_MATRIX_H */ +#endif /* CSC_matrix_H */ diff --git a/include/utils/CSR_Matrix.h b/include/utils/CSR_matrix.h similarity index 62% rename from include/utils/CSR_Matrix.h rename to include/utils/CSR_matrix.h index dd5ad91..a4880c9 100644 --- a/include/utils/CSR_Matrix.h +++ b/include/utils/CSR_matrix.h @@ -15,11 +15,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef CSR_MATRIX_H -#define CSR_MATRIX_H +#ifndef CSR_matrix_H +#define CSR_matrix_H #include -/* CSR (Compressed Sparse Row) Matrix Format +/* CSR_matrix (Compressed Sparse Row) matrix Format * * For an m x n matrix with nnz nonzeros: * - p: array of size (m + 1) indicating start of each row @@ -29,7 +29,7 @@ * - n: number of columns * - nnz: number of nonzero entries */ -typedef struct CSR_Matrix +typedef struct CSR_matrix { int *p; int *i; @@ -37,37 +37,37 @@ typedef struct CSR_Matrix int m; int n; int nnz; -} CSR_Matrix; +} CSR_matrix; /* constructors and destructors */ -CSR_Matrix *new_csr_matrix(int m, int n, int nnz); -CSR_Matrix *new_csr(const CSR_Matrix *A); -CSR_Matrix *new_csr_copy_sparsity(const CSR_Matrix *A); -void free_csr_matrix(CSR_Matrix *matrix); -void copy_csr_matrix(const CSR_Matrix *A, CSR_Matrix *C); +CSR_matrix *new_CSR_matrix(int m, int n, int nnz); +CSR_matrix *new_csr(const CSR_matrix *A); +CSR_matrix *new_csr_copy_sparsity(const CSR_matrix *A); +void free_CSR_matrix(CSR_matrix *matrix); +void copy_CSR_matrix(const CSR_matrix *A, CSR_matrix *C); /* transpose functionality (iwork must be of size A->n) */ -CSR_Matrix *transpose(const CSR_Matrix *A, int *iwork); -CSR_Matrix *AT_alloc(const CSR_Matrix *A, int *iwork); -void AT_fill_values(const CSR_Matrix *A, CSR_Matrix *AT, int *iwork); +CSR_matrix *transpose(const CSR_matrix *A, int *iwork); +CSR_matrix *AT_alloc(const CSR_matrix *A, int *iwork); +void AT_fill_values(const CSR_matrix *A, CSR_matrix *AT, int *iwork); /* computes dense y = Ax */ -void Ax_csr(const CSR_Matrix *A, const double *x, double *y, int col_offset); +void Ax_csr(const CSR_matrix *A, const double *x, double *y, int col_offset); /* fills values of C = diag(d) @ A */ -void DA_fill_values(const double *d, const CSR_Matrix *A, CSR_Matrix *C); +void DA_fill_values(const double *d, const CSR_matrix *A, CSR_matrix *C); /* Count number of columns with nonzero entries in A and marks them in col_nz */ -int count_nonzero_cols(const CSR_Matrix *A, bool *col_nz); +int count_nonzero_cols(const CSR_matrix *A, bool *col_nz); /* inserts 'idx' into array 'arr' in sorted order, and moves the other elements */ void insert_idx(int idx, int *arr, int len); /* get value at position (row, col) in A */ -double csr_get_value(const CSR_Matrix *A, int row, int col); +double csr_get_value(const CSR_matrix *A, int row, int col); -/* Expand symmetric CSR matrix A to full matrix C. A is assumed to store +/* Expand symmetric CSR_matrix matrix A to full matrix C. A is assumed to store only upper triangle. C must be pre-allocated with sufficient nnz */ -void symmetrize_csr(const int *Ap, const int *Ai, int m, CSR_Matrix *C); +void symmetrize_csr(const int *Ap, const int *Ai, int m, CSR_matrix *C); -#endif /* CSR_MATRIX_H */ +#endif /* CSR_matrix_H */ diff --git a/include/utils/CSR_sum.h b/include/utils/CSR_sum.h index f3ba700..62f49d9 100644 --- a/include/utils/CSR_sum.h +++ b/include/utils/CSR_sum.h @@ -18,60 +18,61 @@ #ifndef CSR_SUM_H #define CSR_SUM_H -#include "utils/CSR_Matrix.h" +#include "utils/CSR_matrix.h" /* forward declaration */ struct int_double_pair; /* Compute sparsity pattern of C = A + B (and sets C->nnz) */ -void sum_csr_alloc(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C); +void sum_csr_alloc(const CSR_matrix *A, const CSR_matrix *B, CSR_matrix *C); /* Fills values of C = A + B (assuming C's sparsity pattern is set) */ -void sum_csr_fill_values(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C); +void sum_csr_fill_values(const CSR_matrix *A, const CSR_matrix *B, CSR_matrix *C); /* Fills values of C = diag(d1) * A + diag(d2) * B (assuming C's sparsity is set)*/ -void sum_scaled_csr_matrices_fill_values(const CSR_Matrix *A, const CSR_Matrix *B, - CSR_Matrix *C, const double *d1, +void sum_scaled_csr_matrices_fill_values(const CSR_matrix *A, const CSR_matrix *B, + CSR_matrix *C, const double *d1, const double *d2); -/* The following five functions are used for summing either more than two CSR - matrices or rows of CSR matrices. To implement the filling of values efficiently, - we compute an idx_map when we fill the sparsity pattern of the output matrix, - which maps each nonzero entry in the input matrix to its position in the output - matrix. This allows us to fill the values with a single pass of the output matrix - through the input matrices, without needing to search for the position of each - entry in the output matrix. So each idx_map should have size equal to the number - of nonzeros in the corresponding input matrix, and idx_map[j] should give the - index in the output matrix of the entry (in the value array of the output matrix) - corresponding to the j-th nonzero in the input matrix. +/* The following five functions are used for summing either more than two CSR_matrix + matrices or rows of CSR_matrix matrices. To implement the filling of values + efficiently, we compute an idx_map when we fill the sparsity pattern of the output + matrix, which maps each nonzero entry in the input matrix to its position in the + output matrix. This allows us to fill the values with a single pass of the output + matrix through the input matrices, without needing to search for the position of + each entry in the output matrix. So each idx_map should have size equal to the + number of nonzeros in the corresponding input matrix, and idx_map[j] should give + the index in the output matrix of the entry (in the value array of the output + matrix) corresponding to the j-th nonzero in the input matrix. Output matrix C, input matrix A, iwork->size = max(A->n, A->nnz) for the first four functions. The last function allocates the output matrix and returns it. */ // ------------------------------------------------------------------------------------ -void sum_all_rows_csr_alloc(const CSR_Matrix *A, CSR_Matrix *C, int *iwork, +void sum_all_rows_csr_alloc(const CSR_matrix *A, CSR_matrix *C, int *iwork, int *idx_map); -void sum_block_of_rows_csr_alloc(const CSR_Matrix *A, CSR_Matrix *C, +void sum_block_of_rows_csr_alloc(const CSR_matrix *A, CSR_matrix *C, int row_block_size, int *iwork, int *idx_map); -void sum_evenly_spaced_rows_csr_alloc(const CSR_Matrix *A, CSR_Matrix *C, +void sum_evenly_spaced_rows_csr_alloc(const CSR_matrix *A, CSR_matrix *C, int row_spacing, int *iwork, int *idx_map); -void sum_spaced_rows_into_row_csr_alloc(const CSR_Matrix *A, CSR_Matrix *C, +void sum_spaced_rows_into_row_csr_alloc(const CSR_matrix *A, CSR_matrix *C, int spacing, int *iwork, int *idx_map); /* Compute sparsity pattern of out = A + B + C + D */ -CSR_Matrix *sum_4_csr_alloc(const CSR_Matrix *A, const CSR_Matrix *B, - const CSR_Matrix *C, const CSR_Matrix *D, +CSR_matrix *sum_4_csr_alloc(const CSR_matrix *A, const CSR_matrix *B, + const CSR_matrix *C, const CSR_matrix *D, int *idx_maps[4]); // ------------------------------------------------------------------------------------ -/* Accumulates values from A according to map. Must memset to zero before calling. */ -void accumulator(const CSR_Matrix *A, const int *idx_map, double *out); +/* Accumulates `nnz` values from `vals` into `out` at the positions given by + `idx_map` (length `nnz`). Caller must zero `out` before calling. */ +void accumulator(const double *vals, int nnz, const int *idx_map, double *out); /* Accumulates values from A according to map with spacing. Must memset to zero * before calling. */ -void accumulator_with_spacing(const CSR_Matrix *A, const int *idx_map, double *out, +void accumulator_with_spacing(const CSR_matrix *A, const int *idx_map, double *out, int spacing); #endif /* CSR_SUM_H */ diff --git a/include/utils/linalg_dense_sparse_matmuls.h b/include/utils/linalg_dense_sparse_matmuls.h index 949b2da..b76c3a3 100644 --- a/include/utils/linalg_dense_sparse_matmuls.h +++ b/include/utils/linalg_dense_sparse_matmuls.h @@ -18,26 +18,31 @@ #ifndef LINALG_DENSE_SPARSE_H #define LINALG_DENSE_SPARSE_H -#include "CSC_Matrix.h" -#include "CSR_Matrix.h" +#include "CSC_matrix.h" +#include "CSR_matrix.h" #include "matrix.h" -/* C = (I_p kron A) @ J via the polymorphic Matrix interface. - * A is dense m x n, J is (n*p) x k in CSC, C is (m*p) x k in CSC. */ +/* C = (I_p kron A) @ J via the polymorphic matrix interface. + * A is dense m x n, J is (n*p) x k in CSC_matrix, C is (m*p) x k in CSC_matrix. + * `work` must be sized at least A->n doubles — used as a scratch buffer when + * a sparse column of J needs to be densified before dgemv. The caller is + * responsible for sizing it (typically pre-sized in the corresponding + * sparsity-build step). */ // TODO: maybe we can replace these with I_kron_X functionality? -CSC_Matrix *I_kron_A_alloc(const Matrix *A, const CSC_Matrix *J, int p); -void I_kron_A_fill_values(const Matrix *A, const CSC_Matrix *J, CSC_Matrix *C); +CSC_matrix *I_kron_A_alloc(const matrix *A, const CSC_matrix *J, int p); +void I_kron_A_fill_values(const matrix *A, const CSC_matrix *J, CSC_matrix *C, + double *work); /* Sparsity and values of C = (Y^T kron I_m) @ J where Y is k x n, J is (m*k) x p, and C is (m*n) x p. Y is given in column-major dense format. */ -CSR_Matrix *YT_kron_I_alloc(int m, int k, int n, const CSC_Matrix *J); -void YT_kron_I_fill_values(int m, int k, int n, const double *Y, const CSC_Matrix *J, - CSR_Matrix *C); +CSR_matrix *YT_kron_I_alloc(int m, int k, int n, const CSC_matrix *J); +void YT_kron_I_fill_values(int m, int k, int n, const double *Y, const CSC_matrix *J, + CSR_matrix *C); /* Sparsity and values of C = (I_n kron X) @ J where X is m x k (col-major dense), J is (k*n) x p, and C is (m*n) x p. */ -CSR_Matrix *I_kron_X_alloc(int m, int k, int n, const CSC_Matrix *J); -void I_kron_X_fill_values(int m, int k, int n, const double *X, const CSC_Matrix *J, - CSR_Matrix *C); +CSR_matrix *I_kron_X_alloc(int m, int k, int n, const CSC_matrix *J); +void I_kron_X_fill_values(int m, int k, int n, const double *X, const CSC_matrix *J, + CSR_matrix *C); #endif /* LINALG_DENSE_SPARSE_H */ diff --git a/include/utils/linalg_sparse_matmuls.h b/include/utils/linalg_sparse_matmuls.h index a833078..516ff18 100644 --- a/include/utils/linalg_sparse_matmuls.h +++ b/include/utils/linalg_sparse_matmuls.h @@ -18,41 +18,41 @@ #ifndef LINALG_H #define LINALG_H -#include "CSC_Matrix.h" -#include "CSR_Matrix.h" +#include "CSC_matrix.h" +#include "CSR_matrix.h" /* Compute sparsity pattern and values for the matrix-matrix multiplication C = (I_p kron A) @ J where A is m x n, J is (n*p) x k, and C is (m*p) x k, without relying on generic sparse matrix-matrix multiplication. Specialized logic for this is much faster (50-100x) than generic sparse matmul. - * J is provided in CSC format and is split into p blocks of n rows each - * C is returned in CSC format + * J is provided in CSC_matrix format and is split into p blocks of n rows each + * C is returned in CSC_matrix format * Mathematically it corresponds to C = [A @ J1; A @ J2; ...; A @ Jp], where J = [J1; J2; ...; Jp] */ -CSC_Matrix *block_left_multiply_fill_sparsity(const CSR_Matrix *A, - const CSC_Matrix *J, int p); +CSC_matrix *block_left_multiply_fill_sparsity(const CSR_matrix *A, + const CSC_matrix *J, int p); -void block_left_multiply_fill_values(const CSR_Matrix *A, const CSC_Matrix *J, - CSC_Matrix *C); +void block_left_multiply_fill_values(const CSR_matrix *A, const CSC_matrix *J, + CSC_matrix *C); /* Compute y = kron(I_p, A) @ x where A is m x n and x is(n*p)-length vector. The output y is m*p-length vector corresponding to y = [A @ x1; A @ x2; ...; A @ xp] where x is divided into p blocks of n elements. */ -void block_left_multiply_vec(const CSR_Matrix *A, const double *x, double *y, int p); +void block_left_multiply_vec(const CSR_matrix *A, const double *x, double *y, int p); -/* Fill values of C = A @ B where A is CSR, B is CSC. +/* Fill values of C = A @ B where A is CSR_matrix, B is CSC_matrix. * C must have sparsity pattern already computed. */ -void csr_csc_matmul_fill_values(const CSR_Matrix *A, const CSC_Matrix *B, - CSR_Matrix *C); +void csr_csc_matmul_fill_values(const CSR_matrix *A, const CSC_matrix *B, + CSR_matrix *C); -/* C = A @ B where A is CSR, B is CSC. Result C is CSR. +/* C = A @ B where A is CSR_matrix, B is CSC_matrix. Result C is CSR_matrix. * Allocates and precomputes sparsity pattern. No workspace required. */ -CSR_Matrix *csr_csc_matmul_alloc(const CSR_Matrix *A, const CSC_Matrix *B); +CSR_matrix *csr_csc_matmul_alloc(const CSR_matrix *A, const CSC_matrix *B); #endif /* LINALG_H */ diff --git a/include/utils/matrix.h b/include/utils/matrix.h index c54443a..17a84ad 100644 --- a/include/utils/matrix.h +++ b/include/utils/matrix.h @@ -18,38 +18,135 @@ #ifndef MATRIX_H #define MATRIX_H -#include "CSC_Matrix.h" -#include "CSR_Matrix.h" +#include "CSC_matrix.h" +#include "CSR_matrix.h" +#include -/* Base matrix type with function pointers for polymorphic dispatch */ -typedef struct Matrix +/* Broadcast shape used by the broadcast atom and its vtable methods. */ +typedef enum { - int m, n; - void (*block_left_mult_vec)(const struct Matrix *self, const double *x, - double *y, int p); - CSC_Matrix *(*block_left_mult_sparsity)(const struct Matrix *self, - const CSC_Matrix *J, int p); - void (*block_left_mult_values)(const struct Matrix *self, const CSC_Matrix *J, - CSC_Matrix *C); - void (*update_values)(struct Matrix *self, const double *new_values); - void (*free_fn)(struct Matrix *self); -} Matrix; - -/* Sparse matrix wrapping CSR */ -typedef struct Sparse_Matrix + BROADCAST_ROW, /* (1, n) -> (m, n) */ + BROADCAST_COL, /* (m, 1) -> (m, n) */ + BROADCAST_SCALAR /* (1, 1) -> (m, n) */ +} broadcast_type; + +/* Polymorphic matrix base. Concrete types embed `matrix` as their first + member and implement the vtable slots below. Currently implemented: + 1. sparse_matrix — generic CSR_matrix-backed matrix. + 2. permuted_dense — matrix whose nonzeros lie in a single dense block + located at chosen rows and columns of the global + index space. + A third type is potentially planned. */ + +typedef struct matrix matrix; + +/* y = kron(I_p, A) @ x */ +typedef void (*matrix_block_left_mult_vec_fn)(const matrix *A, const double *x, + double *y, int p); + +/* Allocate sparsity of C = kron(I_p, A) @ J */ +typedef CSC_matrix *(*matrix_block_left_mult_sparsity_fn)(const matrix *A, + const CSC_matrix *J, + int p); + +/* Fill values of C = kron(I_p, A) @ J */ +typedef void (*matrix_block_left_mult_values_fn)(const matrix *A, + const CSC_matrix *J, CSC_matrix *C); + +/* Allocate a new matrix with the same sparsity as A */ +typedef matrix *(*matrix_copy_sparsity_fn)(const matrix *A); + +/* Fill values of C = diag(d) @ A */ +typedef void (*matrix_DA_fill_values_fn)(const double *d, const matrix *A, + matrix *C); + +/* Allocate C = AT @ A */ +typedef matrix *(*matrix_ATA_alloc_fn)(matrix *A); + +/* Fill values of C = AT @ diag(d) @ A */ +typedef void (*matrix_ATDA_fill_values_fn)(const matrix *A, const double *d, + matrix *C); + +/* Allocate AT = transpose(A) */ +typedef matrix *(*matrix_transpose_alloc_fn)(const matrix *A); + +/* Fill values of AT = transpose(A) */ +typedef void (*matrix_transpose_fill_values_fn)(const matrix *A, matrix *AT); + +/* Returns a CSR_matrix view of A */ +typedef CSR_matrix *(*matrix_to_csr_fn)(matrix *A); + +/* Refresh any internal caches (e.g. a CSC_matrix mirror) so subsequent ATA / + ATDA calls reflect the current values. */ +typedef void (*matrix_refresh_csc_values_fn)(matrix *A); + +/* Allocate C = A[indices, :] */ +typedef matrix *(*matrix_index_alloc_fn)(matrix *A, const int *indices, int n_idxs); + +/* Fill values of C = A[indices, :] */ +typedef void (*matrix_index_fill_values_fn)(matrix *A, const int *indices, + int n_idxs, matrix *C); + +/* Row-tiling for the promote atom: A must be a 1-row matrix; returns + a new matrix of shape (size, A->n) where every row is a copy of A's + single row. */ +typedef matrix *(*matrix_promote_alloc_fn)(matrix *A, int size); +typedef void (*matrix_promote_fill_values_fn)(matrix *A, matrix *out); + +/* Broadcast: lift the child Jacobian of a broadcast atom into the output + Jacobian. `type` is the broadcast variant; (d1, d2) is the output shape. */ +typedef matrix *(*matrix_broadcast_alloc_fn)(matrix *A, broadcast_type type, int d1, + int d2); +typedef void (*matrix_broadcast_fill_values_fn)(matrix *A, broadcast_type type, + int d1, int d2, matrix *out); + +/* diag_vec: A is an (n, A->n) Jacobian for a length-n vector; output is + (n*n, A->n) where row i lands at output row i*(n+1) (column-major + diagonal positions). Other output rows are structurally zero. */ +typedef matrix *(*matrix_diag_vec_alloc_fn)(matrix *A); +typedef void (*matrix_diag_vec_fill_values_fn)(matrix *A, matrix *out); + +typedef void (*matrix_free_fn)(matrix *self); + +struct matrix { - Matrix base; - CSR_Matrix *csr; -} Sparse_Matrix; + int m, n, nnz; + double *x; /* non-owning pointer to the value buffer */ + bool is_permuted_dense; + + /* Operator ops */ + matrix_block_left_mult_vec_fn block_left_mult_vec; + matrix_block_left_mult_sparsity_fn block_left_mult_sparsity; + matrix_block_left_mult_values_fn block_left_mult_values; + + /* Chain-rule ops */ + matrix_copy_sparsity_fn copy_sparsity; + matrix_DA_fill_values_fn DA_fill_values; + matrix_ATA_alloc_fn ATA_alloc; + matrix_ATDA_fill_values_fn ATDA_fill_values; + matrix_transpose_alloc_fn transpose_alloc; + matrix_transpose_fill_values_fn transpose_fill_values; + + /* Views and cache */ + matrix_to_csr_fn to_csr; + matrix_refresh_csc_values_fn refresh_csc_values; -/* Constructors */ -Matrix *new_sparse_matrix(const CSR_Matrix *A); + /* Atom-specific ops */ + matrix_index_alloc_fn index_alloc; + matrix_index_fill_values_fn index_fill_values; + matrix_promote_alloc_fn promote_alloc; + matrix_promote_fill_values_fn promote_fill_values; + matrix_broadcast_alloc_fn broadcast_alloc; + matrix_broadcast_fill_values_fn broadcast_fill_values; + matrix_diag_vec_alloc_fn diag_vec_alloc; + matrix_diag_vec_fill_values_fn diag_vec_fill_values; -/* Transpose helper */ -Matrix *sparse_matrix_trans(const Sparse_Matrix *self, int *iwork); + /* Lifecycle */ + matrix_free_fn free_fn; +}; /* Free helper */ -static inline void free_matrix(Matrix *m) +static inline void free_matrix(matrix *m) { if (m) { diff --git a/include/utils/matrix_BTA.h b/include/utils/matrix_BTA.h new file mode 100644 index 0000000..05b6d58 --- /dev/null +++ b/include/utils/matrix_BTA.h @@ -0,0 +1,43 @@ +/* + * Copyright 2026 Daniel Cederberg and William Zhang + * + * This file is part of the SparseDiffEngine project. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ +#ifndef MATRIX_BTA_H +#define MATRIX_BTA_H + +#include "matrix.h" +#include "permuted_dense.h" + +/* Polymorphic dispatchers for C = BT @ A and C = BT @ diag(d) @ A. The output + type depends on the input types: (PD, PD) → PD, (Sparse, PD) → PD, + (PD, Sparse) → PD, (Sparse, Sparse) → Sparse. (Here PD = permuted_dense.) + + Contract: neither function touches sparse_matrix internals. The caller must, + before calling either function, ensure each Sparse operand's csc_cache + exists (sparse_matrix_ensure_csc_cache). Before BTDA_matrices_fill_values + the caller must also refresh the cache values (refresh_csc_values). */ + +/* Allocate sparsity for C = BT @ A. */ +matrix *BTA_matrices_alloc(matrix *A, matrix *B); + +/* Fill values of C = BT @ diag(d) @ A. */ +void BTDA_matrices_fill_values(matrix *A, const double *d, matrix *B, matrix *C); + +/* Polymorphic dispatcher: C = B @ A where B is PD and A is any matrix + type (permuted_dense or sparse_matrix). C is always PD. Routes on A's + type. For the sparse-A branch the dispatcher ensures sm_A->csc_cache + structure exists at alloc time; before BA_pd_matrices_fill_values the + caller must have refreshed sm_A->csc_cache values (same fill-side + contract as BTDA_matrices_fill_values). */ +matrix *BA_pd_matrices_alloc(const permuted_dense *B, const matrix *A); +void BA_pd_matrices_fill_values(const permuted_dense *B, const matrix *A, + permuted_dense *C); + +#endif /* MATRIX_BTA_H */ diff --git a/include/utils/dense_matrix.h b/include/utils/matrix_sum.h similarity index 53% rename from include/utils/dense_matrix.h rename to include/utils/matrix_sum.h index bd1693b..95c6fcf 100644 --- a/include/utils/dense_matrix.h +++ b/include/utils/matrix_sum.h @@ -15,26 +15,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#ifndef DENSE_MATRIX_H -#define DENSE_MATRIX_H +#ifndef MATRIX_SUM_H +#define MATRIX_SUM_H #include "matrix.h" -/* Dense matrix (row-major) */ -typedef struct Dense_Matrix -{ - Matrix base; - double *x; - double *work; /* scratch buffer, length n */ -} Dense_Matrix; +/* Polymorphic wrappers for allocating C = A + B. Right now we always + convert to CSR matrices internally for the sum. */ +void sum_matrices_alloc(matrix *A, matrix *B, matrix *C); -/* Constructors. If data is NULL, the value buffer is allocated but left - uninitialized; otherwise m*n entries are copied from data. */ -Matrix *new_dense_matrix(int m, int n, const double *data); +/* Fill values of C = A + B. Uses CSR matrices internally. */ +void sum_matrices_fill_values(matrix *A, matrix *B, matrix *C); -/* Transpose helper */ -Matrix *dense_matrix_trans(const Dense_Matrix *self); +/* Fill values of C = diag(d1) * A + diag(d2) * B. Uses CSR matrices internally. */ +void sum_scaled_matrices_fill_values(matrix *A, matrix *B, matrix *C, + const double *d1, const double *d2); -void A_transpose(double *AT, const double *A, int m, int n); - -#endif /* DENSE_MATRIX_H */ +#endif /* MATRIX_SUM_H */ diff --git a/include/utils/mini_numpy.h b/include/utils/mini_numpy.h index c87f80a..ac14051 100644 --- a/include/utils/mini_numpy.h +++ b/include/utils/mini_numpy.h @@ -18,7 +18,7 @@ #ifndef MINI_NUMPY_H #define MINI_NUMPY_H -#include "utils/CSR_Matrix.h" +#include "utils/CSR_matrix.h" /* Example: a = [1, 2], len = 2, repeats = 3, result = [1, 1, 1, 2, 2, 2] */ void repeat(double *result, const double *a, int len, int repeats); @@ -33,6 +33,9 @@ void scaled_ones(double *result, int size, double value); /* Naive implementation of Z = X @ Y, X is m x k, Y is k x n, Z is m x n */ void mat_mat_mult(const double *X, const double *Y, double *Z, int m, int k, int n); +/* Row-major dense transpose: AT[j*m + i] = A[i*n + j] for an m x n A. */ +void A_transpose(double *AT, const double *A, int m, int n); + /* Compute v = (Y kron I_m) @ w where Y is k x n (col-major), len(w) = m * n, and len(v) = m * k. Equivalently, reshape w as the m x n matrix W (col-major) and compute v = vec(W @ Y^T). */ @@ -46,10 +49,10 @@ void I_kron_XT_vec(int m, int k, int n, const double *X, const double *w, double /* Fill T_csr's row pointers and column indices for the 1D full-convolution Toeplitz matrix T(a), sized (m+n-1) x n with m*n nonzeros. Values (x) are not written; call conv_matrix_fill_values to populate them. */ -void conv_matrix_fill_sparsity(CSR_Matrix *T_csr, int m, int n); +void conv_matrix_fill_sparsity(CSR_matrix *T_csr, int m, int n); /* Overwrite T_csr->x from kernel a, using the sparsity already written by conv_matrix_fill_sparsity. T[r, col] = a[r - col]. */ -void conv_matrix_fill_values(CSR_Matrix *T_csr, const double *a); +void conv_matrix_fill_values(CSR_matrix *T_csr, const double *a); #endif /* MINI_NUMPY_H */ diff --git a/include/utils/permuted_dense.h b/include/utils/permuted_dense.h new file mode 100644 index 0000000..4869d97 --- /dev/null +++ b/include/utils/permuted_dense.h @@ -0,0 +1,156 @@ +/* + * Copyright 2026 Daniel Cederberg and William Zhang + * + * This file is part of the SparseDiffEngine project. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef PERMUTED_DENSE_H +#define PERMUTED_DENSE_H + +#include "matrix.h" +#include + +/* permuted_dense represents a matrix whose only nonzeros lie in a dense + block, after rows and columns are restricted to chosen subsets of the + global index space. For local indices (ii, jj) with 0 <= ii < m0 + and 0 <= jj < n0, + + M[row_perm[ii], col_perm[jj]] = X[ii, jj]. + + All other entries of M are zero. row_perm and col_perm are stored in + strictly increasing order; the constructor asserts this. */ +typedef struct permuted_dense +{ + matrix base; + int m0; /* rows of dense block (= len(row_perm)) */ + int n0; /* cols of dense block (= len(col_perm)) */ + int *row_perm; /* row_perm[ii] in [0, base.m), sorted */ + int *col_perm; /* col_perm[jj] in [0, base.n), sorted */ + double *X; /* m0 * n0, row-major */ + int *col_inv; /* length base.n: col_inv[col_perm[jj]] = jj, */ + /* otherwise -1; used by `x CSC_matrix` allocation. */ + int *row_inv; /* length base.m: row_inv[row_perm[ii]] = ii, */ + /* otherwise -1; used by index_alloc. */ + CSR_matrix *csr_cache; /* lazy CSR_matrix view built by to_csr; structure */ + /* allocated on first call, values refilled */ + /* on every call. NULL until first call. */ + /* Mutable double-precision BLAS scratch shared across kernels that + operate on this PD. Two non-overlapping roles (a given fill call uses + at most one): + - Y-buffer: holds diag(d_perm) X for ATDA / BTDA_pd_pd (size m0*n0). + - transpose: holds (diag(d) X)^T for the BA_pd_csc-based BTDA + kernels (BTDA_pd_csc and, transitively, BTDA_csc_pd + via its delegate). Size m0*n0 doubles. + Allocated lazily on the first kernel that needs it; grown in place + (free + SP_MALLOC, contents not preserved) if a later kernel needs + more. `dwork == NULL` and `dwork_size == 0` before first use. + Functions taking a const permuted_dense * may still mutate `dwork`. */ + double *dwork; + size_t dwork_size; + + /* Mutable int scratch. Currently only used to hold the row-intersection + index arrays idx_A / idx_B in BTA_pd_pd_fill_values and the + slow path of BTDA_pd_pd_fill_values; allocated by + BTA_pd_pd_alloc for those outputs (NULL on PDs from other + allocators). Fill kernels fall back to a per-call SP_MALLOC if + iwork_size is too small. */ + int *iwork; + size_t iwork_size; + + /* CONTRACT: `dwork` and `iwork` are freely overwritten by every kernel + that takes this PD as input or output — contents do NOT survive + across calls. Do not use them to cache precomputed factors or carry + state between kernel invocations: any subsequent call (ATDA, BTDA, + BTA gather, …) may clobber them without warning. If you need + persistence, add a dedicated field. */ +} permuted_dense; + +/* Constructor. row_perm and col_perm must be strictly increasing in their + respective ranges. If X_data is NULL the value buffer is allocated but + left uninitialized; otherwise m0 * n0 entries are copied. */ +matrix *new_permuted_dense(int m, int n, int m0, int n0, const int *row_perm, + const int *col_perm, const double *X_data); + +/* Convenience constructor for the trivial-perm case: row_perm = [0..m-1], + col_perm = [0..n-1], dense block fills the full (m, n) shape. */ +matrix *new_permuted_dense_full(int m, int n, const double *data); + +/* CSR_matrix view: callers should use the vtable, i.e. base.to_csr(base). The PD + owns and caches the returned CSR_matrix; its value array aliases self->X, + so values are always live with no separate fill needed. Callers must not + free the returned CSR_matrix — it's released by free_matrix on the PD. */ + +/* Fill values of C = diag(d) @ A where len(d) = number of (global) rows of A */ +void DA_pd_fill_values(const double *d, const permuted_dense *A, permuted_dense *C); + +/* Allocate new permuted dense for C = AT @ A */ +matrix *ATA_pd_alloc(const permuted_dense *A); + +/* Fill values of C = AT @ diag(d) @ A */ +void ATDA_pd_fill_values(const permuted_dense *A, const double *d, + permuted_dense *C); + +/* Allocate new permuted dense forC = BT @ A where A and B are both permuted_dense. + (If B and A have no overlapping rows, then C is empty) */ +matrix *BTA_pd_pd_alloc(const permuted_dense *B, const permuted_dense *A); + +/* Fill values of C = BT @ A where A and B are both permuted dense. */ +void BTA_pd_pd_fill_values(const permuted_dense *B, const permuted_dense *A, + permuted_dense *C); + +/* Fill values of C = BT @ diag(d) @ A where A and B are both permuted dense. */ +void BTDA_pd_pd_fill_values(const permuted_dense *B, const double *d, + const permuted_dense *A, permuted_dense *C); + +/* Allocate new permuted dense for C = B @ A where B is PD and A is CSC. + This function is currently never used in production, but we keep it + here because it is simple and might be useful in the future. */ +matrix *BA_pd_csc_alloc(const permuted_dense *B, const CSC_matrix *A); + +/* Fill values of C = B @ A where B is value buffer to permuted dense and A is CSC. + + The raw-buffer signature for B lets callers pass a transposed dense block + (e.g. (diag(d) B)^T stored in B->dwork) without needing to build a transposed + permuted dense. */ +void BA_pd_csc_fill_values(const double *B, int n0_B, const int *inv, + const CSC_matrix *A, permuted_dense *C); + +/* Allocate new permuted dense for C = B @ A where B and A are both PD. Both + may have arbitrary (sorted) row_perm / col_perm; no full-block assumption. + If B->col_perm and A->row_perm have no overlap C is structurally empty; + otherwise C has row_perm = B->row_perm, col_perm = A->col_perm. */ +matrix *BA_pd_pd_alloc(const permuted_dense *B, const permuted_dense *A); + +/* Fill values of C = B @ A for two PDs (general row_perm / col_perm). + Intersects B->col_perm with A->row_perm, gathers the matching column + slice of B and row slice of A into the operands' dwork scratch, and + computes one cblas_dgemm. */ +void BA_pd_pd_fill_values(const permuted_dense *B, const permuted_dense *A, + permuted_dense *C); + +/* Allocate new permuted dense for C = B^T @ A where B is PD and A is CSC */ +matrix *BTA_pd_csc_alloc(const permuted_dense *B, const CSC_matrix *A); + +/* Fill values of C = B^T @ diag(d) @ A where B is PD and A is CSC */ +void BTDA_pd_csc_fill_values(const permuted_dense *B, const double *d, + const CSC_matrix *A, permuted_dense *C); + +/* Allocate new permuted_dense for C = B^T @ A where B is Sparse CSC and A is PD. */ +matrix *BTA_csc_pd_alloc(const CSC_matrix *B, const permuted_dense *A); + +/* Fill values of C = B^T @ diag(d) @ A where B is CSC and A is PD */ +void BTDA_csc_pd_fill_values(const CSC_matrix *B, const double *d, + const permuted_dense *A, permuted_dense *C); + +#endif /* PERMUTED_DENSE_H */ diff --git a/include/utils/sparse_matrix.h b/include/utils/sparse_matrix.h new file mode 100644 index 0000000..210daa1 --- /dev/null +++ b/include/utils/sparse_matrix.h @@ -0,0 +1,56 @@ +/* + * Copyright 2026 Daniel Cederberg and William Zhang + * + * This file is part of the SparseDiffEngine project. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef SPARSE_MATRIX_H +#define SPARSE_MATRIX_H + +#include "matrix.h" + +/* Sparse matrix wrapping CSR_matrix. csc_cache is a lazily-built CSC_matrix mirror + used by the chain-rule ATA / ATDA paths; it's allocated on first need and refilled + by refresh_csc_values. csc_iwork is the workspace for csr_to_csc. */ +typedef struct sparse_matrix +{ + matrix base; + CSR_matrix *csr; + CSC_matrix *csc_cache; + int *csc_iwork; + int *transpose_iwork; /* sized csr->n; allocated by sparse_transpose_alloc + on the output sm and reused by + sparse_transpose_fill_values. NULL when this + sm wasn't produced by transpose_alloc. */ +} sparse_matrix; + +/* Constructor. Takes ownership of A; the caller must not free A separately + (free_matrix on the returned matrix frees A). */ +matrix *new_sparse_matrix(CSR_matrix *A); + +/* Convenience: allocate a sparse_matrix of shape (m, n) with capacity for + nnz entries. Equivalent to new_sparse_matrix(new_CSR_matrix(m, n, nnz)). + Sparsity pattern and values are uninitialized. */ +matrix *new_sparse_matrix_alloc(int m, int n, int nnz); + +/* Transpose helper */ +matrix *sparse_matrix_trans(const sparse_matrix *self, int *iwork); + +/* Build the CSC_matrix cache structure if absent. Idempotent; structure-only, + values are NOT filled (use refresh_csc_values for that). Exposed so the + bivariate dispatchers in matrix_BTA can prepare sparsity without touching + uninitialized values during the init phase. */ +void sparse_matrix_ensure_csc_cache(sparse_matrix *sm); + +#endif /* SPARSE_MATRIX_H */ diff --git a/include/utils/utils.h b/include/utils/utils.h index 8346858..5c3c5fe 100644 --- a/include/utils/utils.h +++ b/include/utils/utils.h @@ -18,9 +18,23 @@ #ifndef UTILS_H #define UTILS_H +#include + +#ifndef MAX #define MAX(a, b) ((a) > (b) ? (a) : (b)) +#endif +#ifndef MIN +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#endif /* Sort an array of integers in ascending order */ void sort_int_array(int *array, int size); +/* Return true if sorted index arrays a_idx and b_idx (lengths a_len, b_len) + share any value, where b_idx entries are shifted by b_offset before + comparison (a_idx[ai] == b_idx[bi] - b_offset). Use b_offset = 0 for a + plain intersection check. */ +bool has_overlap(const int *a_idx, int a_len, const int *b_idx, int b_len, + int b_offset); + #endif // UTILS_H diff --git a/src/atoms/affine/add.c b/src/atoms/affine/add.c index 631ffb6..288bfde 100644 --- a/src/atoms/affine/add.c +++ b/src/atoms/affine/add.c @@ -16,7 +16,8 @@ * limitations under the License. */ #include "atoms/affine.h" -#include "utils/CSR_sum.h" +#include "utils/matrix_sum.h" +#include "utils/sparse_matrix.h" #include "utils/tracked_alloc.h" #include #include @@ -43,10 +44,10 @@ static void jacobian_init_impl(expr *node) /* we never have to store more than the sum of children's nnz */ int nnz_max = node->left->jacobian->nnz + node->right->jacobian->nnz; - node->jacobian = new_csr_matrix(node->size, node->n_vars, nnz_max); + node->jacobian = new_sparse_matrix_alloc(node->size, node->n_vars, nnz_max); - /* fill sparsity pattern */ - sum_csr_alloc(node->left->jacobian, node->right->jacobian, node->jacobian); + /* fill sparsity pattern */ + sum_matrices_alloc(node->left->jacobian, node->right->jacobian, node->jacobian); } static void eval_jacobian(expr *node) @@ -56,7 +57,8 @@ static void eval_jacobian(expr *node) node->right->eval_jacobian(node->right); /* sum children's jacobians */ - sum_csr_fill_values(node->left->jacobian, node->right->jacobian, node->jacobian); + sum_matrices_fill_values(node->left->jacobian, node->right->jacobian, + node->jacobian); } static void wsum_hess_init_impl(expr *node) @@ -67,10 +69,11 @@ static void wsum_hess_init_impl(expr *node) /* we never have to store more than the sum of children's nnz */ int nnz_max = node->left->wsum_hess->nnz + node->right->wsum_hess->nnz; - node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, nnz_max); + node->wsum_hess = new_sparse_matrix_alloc(node->n_vars, node->n_vars, nnz_max); /* fill sparsity pattern of hessian */ - sum_csr_alloc(node->left->wsum_hess, node->right->wsum_hess, node->wsum_hess); + sum_matrices_alloc(node->left->wsum_hess, node->right->wsum_hess, + node->wsum_hess); } static void eval_wsum_hess(expr *node, const double *w) @@ -80,8 +83,8 @@ static void eval_wsum_hess(expr *node, const double *w) node->right->eval_wsum_hess(node->right, w); /* sum children's wsum_hess */ - sum_csr_fill_values(node->left->wsum_hess, node->right->wsum_hess, - node->wsum_hess); + sum_matrices_fill_values(node->left->wsum_hess, node->right->wsum_hess, + node->wsum_hess); } static bool is_affine(const expr *node) diff --git a/src/atoms/affine/broadcast.c b/src/atoms/affine/broadcast.c index 94e32e1..213461b 100644 --- a/src/atoms/affine/broadcast.c +++ b/src/atoms/affine/broadcast.c @@ -17,7 +17,6 @@ */ #include "atoms/affine.h" #include "subexpr.h" -#include "utils/mini_numpy.h" #include "utils/tracked_alloc.h" #include #include @@ -71,119 +70,21 @@ static void jacobian_init_impl(expr *node) { expr *x = node->left; jacobian_init(x); - broadcast_expr *bcast = (broadcast_expr *) node; - int total_nnz; - - // -------------------------------------------------------------------- - // count number of nonzeros - // -------------------------------------------------------------------- - if (bcast->type == BROADCAST_ROW) - { - /* Row broadcast: (1, n) -> (m, n) */ - total_nnz = x->jacobian->nnz * node->d1; - } - else if (bcast->type == BROADCAST_COL) - { - /* Column broadcast: (m, 1) -> (m, n) */ - total_nnz = x->jacobian->nnz * node->d2; - } - else - { - /* Scalar broadcast: (1, 1) -> (m, n) */ - total_nnz = x->jacobian->nnz * node->size; - } - - node->jacobian = new_csr_matrix(node->size, node->n_vars, total_nnz); - - // --------------------------------------------------------------------- - // fill sparsity pattern - // --------------------------------------------------------------------- - CSR_Matrix *Jx = x->jacobian; - CSR_Matrix *J = node->jacobian; - - if (bcast->type == BROADCAST_ROW) - { - J->nnz = 0; - for (int i = 0; i < node->d2; i++) - { - int nnz_in_row = Jx->p[i + 1] - Jx->p[i]; - - /* copy columns indices */ - tile_int(J->i + J->nnz, Jx->i + Jx->p[i], nnz_in_row, node->d1); - - /* set row pointers */ - for (int rep = 0; rep < node->d1; rep++) - { - J->p[i * node->d1 + rep] = J->nnz; - J->nnz += nnz_in_row; - } - } - assert(J->nnz == total_nnz); - J->p[node->size] = total_nnz; - } - else if (bcast->type == BROADCAST_COL) - { - /* copy column indices */ - tile_int(J->i, Jx->i, Jx->nnz, node->d2); - /* set row pointers */ - int offset = 0; - for (int i = 0; i < node->d2; i++) - { - for (int j = 0; j < node->d1; j++) - { - int nnz_in_row = Jx->p[j + 1] - Jx->p[j]; - J->p[i * node->d1 + j] = offset; - offset += nnz_in_row; - } - } - assert(offset == total_nnz); - J->p[node->size] = total_nnz; - } - else - { - /* copy column indices */ - tile_int(J->i, Jx->i, Jx->nnz, node->size); - - /* set row pointers */ - int offset = 0; - int nnz = Jx->p[1] - Jx->p[0]; - for (int i = 0; i < node->size; i++) - { - J->p[i] = offset; - offset += nnz; - } - assert(offset == total_nnz); - J->p[node->size] = total_nnz; - } + /* allocate sparsity for the broadcast output; output type matches child's. */ + broadcast_expr *bcast = (broadcast_expr *) node; + node->jacobian = + x->jacobian->broadcast_alloc(x->jacobian, bcast->type, node->d1, node->d2); } static void eval_jacobian(expr *node) { node->left->eval_jacobian(node->left); + /* fill values into the preallocated output. */ broadcast_expr *bcast = (broadcast_expr *) node; - CSR_Matrix *Jx = node->left->jacobian; - CSR_Matrix *J = node->jacobian; - - if (bcast->type == BROADCAST_ROW) - { - J->nnz = 0; - for (int i = 0; i < node->d2; i++) - { - int nnz_in_row = Jx->p[i + 1] - Jx->p[i]; - tile_double(J->x + J->nnz, Jx->x + Jx->p[i], nnz_in_row, node->d1); - J->nnz += nnz_in_row * node->d1; - } - } - else if (bcast->type == BROADCAST_COL) - { - tile_double(J->x, Jx->x, Jx->nnz, node->d2); - } - else - { - tile_double(J->x, Jx->x, Jx->nnz, node->size); - } + node->left->jacobian->broadcast_fill_values(node->left->jacobian, bcast->type, + node->d1, node->d2, node->jacobian); } static void wsum_hess_init_impl(expr *node) @@ -192,7 +93,7 @@ static void wsum_hess_init_impl(expr *node) wsum_hess_init(x); /* Same sparsity as child - weights get summed */ - node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess); + node->wsum_hess = x->wsum_hess->copy_sparsity(x->wsum_hess); /* allocate space for weight vector */ node->work->dwork = SP_MALLOC(node->size * sizeof(double)); @@ -239,7 +140,8 @@ static void eval_wsum_hess(expr *node, const double *w) } x->eval_wsum_hess(x, node->work->dwork); - memcpy(node->wsum_hess->x, x->wsum_hess->x, x->wsum_hess->nnz * sizeof(double)); + memcpy(node->wsum_hess->x, x->wsum_hess->x, + node->wsum_hess->nnz * sizeof(double)); } static bool is_affine(const expr *node) diff --git a/src/atoms/affine/convolve.c b/src/atoms/affine/convolve.c index 55e5857..9cdfeb7 100644 --- a/src/atoms/affine/convolve.c +++ b/src/atoms/affine/convolve.c @@ -17,9 +17,10 @@ */ #include "atoms/affine.h" #include "subexpr.h" -#include "utils/CSR_Matrix.h" +#include "utils/CSR_matrix.h" #include "utils/linalg_sparse_matmuls.h" #include "utils/mini_numpy.h" +#include "utils/sparse_matrix.h" #include "utils/tracked_alloc.h" #include #include @@ -76,13 +77,14 @@ static void jacobian_init_impl(expr *node) jacobian_init(child); /* Build convolution matrix of size (m+n-1) x n with m*n nonzeros */ - cnode->T = new_csr_matrix(m + n - 1, n, m * n); + cnode->T = new_CSR_matrix(m + n - 1, n, m * n); conv_matrix_fill_sparsity(cnode->T, m, n); conv_matrix_fill_values(cnode->T, a); /* J = T @ J_child */ - cnode->Jchild_CSC = csr_to_csc_alloc(child->jacobian, node->work->iwork); - node->jacobian = csr_csc_matmul_alloc(cnode->T, cnode->Jchild_CSC); + cnode->Jchild_CSC = csr_to_csc_alloc(child->jacobian->to_csr(child->jacobian), node->work->iwork); + node->jacobian = + new_sparse_matrix(csr_csc_matmul_alloc(cnode->T, cnode->Jchild_CSC)); } static void eval_jacobian(expr *node) @@ -93,8 +95,10 @@ static void eval_jacobian(expr *node) child->eval_jacobian(child); /* J = T @ J_child */ - csr_to_csc_fill_values(child->jacobian, cnode->Jchild_CSC, node->work->iwork); - csr_csc_matmul_fill_values(cnode->T, cnode->Jchild_CSC, node->jacobian); + csr_to_csc_fill_values(child->jacobian->to_csr(child->jacobian), cnode->Jchild_CSC, + node->work->iwork); + csr_csc_matmul_fill_values(cnode->T, cnode->Jchild_CSC, + node->jacobian->to_csr(node->jacobian)); } static void wsum_hess_init_impl(expr *node) @@ -103,7 +107,7 @@ static void wsum_hess_init_impl(expr *node) convolve_expr *cnode = (convolve_expr *) node; wsum_hess_init(child); - node->wsum_hess = new_csr_copy_sparsity(child->wsum_hess); + node->wsum_hess = child->wsum_hess->copy_sparsity(child->wsum_hess); node->work->dwork = (double *) SP_MALLOC(cnode->n * sizeof(double)); } @@ -127,7 +131,7 @@ static void eval_wsum_hess(expr *node, const double *w) child->eval_wsum_hess(child, w_prime); memcpy(node->wsum_hess->x, child->wsum_hess->x, - child->wsum_hess->nnz * sizeof(double)); + node->wsum_hess->nnz * sizeof(double)); } static bool is_affine(const expr *node) @@ -138,8 +142,8 @@ static bool is_affine(const expr *node) static void free_type_data(expr *node) { convolve_expr *cnode = (convolve_expr *) node; - free_csr_matrix(cnode->T); - free_csc_matrix(cnode->Jchild_CSC); + free_CSR_matrix(cnode->T); + free_CSC_matrix(cnode->Jchild_CSC); free_expr(cnode->param_source); } diff --git a/src/atoms/affine/diag_mat.c b/src/atoms/affine/diag_mat.c index e168d02..0d5a7c1 100644 --- a/src/atoms/affine/diag_mat.c +++ b/src/atoms/affine/diag_mat.c @@ -28,7 +28,7 @@ expr *new_diag_mat(expr *child) assert(child->d1 == child->d2); int n = child->d1; - int *indices = (int *) malloc((size_t) n * sizeof(int)); + int *indices = (int *) malloc(n * sizeof(int)); for (int i = 0; i < n; i++) { indices[i] = i * (n + 1); diff --git a/src/atoms/affine/diag_vec.c b/src/atoms/affine/diag_vec.c index bd94c05..03fbd45 100644 --- a/src/atoms/affine/diag_vec.c +++ b/src/atoms/affine/diag_vec.c @@ -48,49 +48,18 @@ static void forward(expr *node, const double *u) static void jacobian_init_impl(expr *node) { expr *x = node->left; - int n = x->size; jacobian_init(x); - CSR_Matrix *Jx = x->jacobian; - CSR_Matrix *J = new_csr_matrix(node->size, node->n_vars, Jx->nnz); - - /* Output has n^2 rows but only n diagonal positions are non-empty. - * Diagonal position i is at row i*(n+1) in Fortran order. */ - int nnz = 0; - int next_diag = 0; - for (int row = 0; row < node->size; row++) - { - J->p[row] = nnz; - if (row == next_diag) - { - int child_row = row / (n + 1); - int len = Jx->p[child_row + 1] - Jx->p[child_row]; - memcpy(J->i + nnz, Jx->i + Jx->p[child_row], len * sizeof(int)); - nnz += len; - next_diag += n + 1; - } - } - J->p[node->size] = nnz; - - node->jacobian = J; + /* output type matches child's; rows i*(n+1) hold child row i, others zero. */ + node->jacobian = x->jacobian->diag_vec_alloc(x->jacobian); } static void eval_jacobian(expr *node) { - expr *x = node->left; - int n = x->size; - x->eval_jacobian(x); - - CSR_Matrix *J = node->jacobian; - CSR_Matrix *Jx = x->jacobian; + node->left->eval_jacobian(node->left); - /* Copy values from child row i to output diagonal row i*(n+1) */ - for (int i = 0; i < n; i++) - { - int out_row = i * (n + 1); - int len = J->p[out_row + 1] - J->p[out_row]; - memcpy(J->x + J->p[out_row], Jx->x + Jx->p[i], len * sizeof(double)); - } + /* fill the diagonal rows of the preallocated output. */ + node->left->jacobian->diag_vec_fill_values(node->left->jacobian, node->jacobian); } static void wsum_hess_init_impl(expr *node) @@ -105,8 +74,7 @@ static void wsum_hess_init_impl(expr *node) /* Copy child's Hessian structure (diag_vec is linear, so its own Hessian is * zero) */ - CSR_Matrix *Hx = x->wsum_hess; - node->wsum_hess = new_csr_copy_sparsity(Hx); + node->wsum_hess = x->wsum_hess->copy_sparsity(x->wsum_hess); } static void eval_wsum_hess(expr *node, const double *w) @@ -122,7 +90,8 @@ static void eval_wsum_hess(expr *node, const double *w) /* Evaluate child's Hessian with extracted weights */ x->eval_wsum_hess(x, node->work->dwork); - memcpy(node->wsum_hess->x, x->wsum_hess->x, x->wsum_hess->nnz * sizeof(double)); + memcpy(node->wsum_hess->x, x->wsum_hess->x, + node->wsum_hess->nnz * sizeof(double)); } static bool is_affine(const expr *node) diff --git a/src/atoms/affine/hstack.c b/src/atoms/affine/hstack.c index da670d2..4bf416e 100644 --- a/src/atoms/affine/hstack.c +++ b/src/atoms/affine/hstack.c @@ -16,7 +16,9 @@ * limitations under the License. */ #include "atoms/affine.h" +#include "subexpr.h" #include "utils/CSR_sum.h" +#include "utils/sparse_matrix.h" #include "utils/tracked_alloc.h" #include #include @@ -56,17 +58,16 @@ static void jacobian_init_impl(expr *node) nnz += hnode->args[i]->jacobian->nnz; } - node->jacobian = new_csr_matrix(node->size, node->n_vars, nnz); + CSR_matrix *A = new_CSR_matrix(node->size, node->n_vars, nnz); /* precompute sparsity pattern of this node's jacobian */ int row_offset = 0; - CSR_Matrix *A = node->jacobian; A->nnz = 0; for (int i = 0; i < hnode->n_args; i++) { expr *child = hnode->args[i]; - CSR_Matrix *B = child->jacobian; + CSR_matrix *B = child->jacobian->to_csr(child->jacobian); /* copy columns */ memcpy(A->i + A->nnz, B->i, B->nnz * sizeof(int)); @@ -81,23 +82,22 @@ static void jacobian_init_impl(expr *node) row_offset += child->size; } A->p[node->size] = A->nnz; + node->jacobian = new_sparse_matrix(A); } static void eval_jacobian(expr *node) { hstack_expr *hnode = (hstack_expr *) node; - CSR_Matrix *A = node->jacobian; - A->nnz = 0; + node->jacobian->nnz = 0; for (int i = 0; i < hnode->n_args; i++) { expr *child = hnode->args[i]; child->eval_jacobian(child); - /* copy values */ - memcpy(A->x + A->nnz, child->jacobian->x, + memcpy(node->jacobian->x + node->jacobian->nnz, child->jacobian->x, child->jacobian->nnz * sizeof(double)); - A->nnz += child->jacobian->nnz; + node->jacobian->nnz += child->jacobian->nnz; } } @@ -114,25 +114,24 @@ static void wsum_hess_init_impl(expr *node) /* worst-case scenario the nnz of node->wsum_hess is the sum of children's nnz */ - node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, nnz); - hnode->CSR_work = new_csr_matrix(node->n_vars, node->n_vars, nnz); + CSR_matrix *H = new_CSR_matrix(node->n_vars, node->n_vars, nnz); + hnode->CSR_work = new_CSR_matrix(node->n_vars, node->n_vars, nnz); /* fill sparsity pattern */ - CSR_Matrix *H = node->wsum_hess; H->nnz = 0; - for (int i = 0; i < hnode->n_args; i++) { - expr *child = hnode->args[i]; - copy_csr_matrix(H, hnode->CSR_work); - sum_csr_alloc(hnode->CSR_work, child->wsum_hess, H); + matrix *child_hess = hnode->args[i]->wsum_hess; + copy_CSR_matrix(H, hnode->CSR_work); + sum_csr_alloc(hnode->CSR_work, child_hess->to_csr(child_hess), H); } + node->wsum_hess = new_sparse_matrix(H); } static void wsum_hess_eval(expr *node, const double *w) { hstack_expr *hnode = (hstack_expr *) node; - CSR_Matrix *H = node->wsum_hess; + CSR_matrix *H = node->wsum_hess->to_csr(node->wsum_hess); int row_offset = 0; memset(H->x, 0, H->nnz * sizeof(double)); @@ -140,8 +139,9 @@ static void wsum_hess_eval(expr *node, const double *w) { expr *child = hnode->args[i]; child->eval_wsum_hess(child, w + row_offset); - copy_csr_matrix(H, hnode->CSR_work); - sum_csr_fill_values(hnode->CSR_work, child->wsum_hess, H); + copy_CSR_matrix(H, hnode->CSR_work); + sum_csr_fill_values(hnode->CSR_work, + child->wsum_hess->to_csr(child->wsum_hess), H); row_offset += child->size; } } @@ -169,7 +169,7 @@ static void free_type_data(expr *node) hnode->args[i] = NULL; } - free_csr_matrix(hnode->CSR_work); + free_CSR_matrix(hnode->CSR_work); hnode->CSR_work = NULL; free(hnode->args); hnode->args = NULL; diff --git a/src/atoms/affine/index.c b/src/atoms/affine/index.c index 34c8310..3fd070e 100644 --- a/src/atoms/affine/index.c +++ b/src/atoms/affine/index.c @@ -64,21 +64,10 @@ static void jacobian_init_impl(expr *node) index_expr *idx = (index_expr *) node; jacobian_init(x); - CSR_Matrix *Jx = x->jacobian; - CSR_Matrix *J = new_csr_matrix(node->size, node->n_vars, Jx->nnz); - - /* set sparsity pattern */ - J->p[0] = 0; - for (int i = 0; i < idx->n_idxs; i++) - { - int row = idx->indices[i]; - int len = Jx->p[row + 1] - Jx->p[row]; - memcpy(J->i + J->p[i], Jx->i + Jx->p[row], len * sizeof(int)); - J->p[i + 1] = J->p[i] + len; - } - - J->nnz = J->p[idx->n_idxs]; - node->jacobian = J; + /* allocate sparsity pattern for the matrix consisting of rows + 'idx->indices' of the child's Jacobian */ + node->jacobian = + x->jacobian->index_alloc(x->jacobian, idx->indices, idx->n_idxs); } static void eval_jacobian(expr *node) @@ -87,14 +76,9 @@ static void eval_jacobian(expr *node) index_expr *idx = (index_expr *) node; x->eval_jacobian(x); - CSR_Matrix *J = node->jacobian; - CSR_Matrix *Jx = x->jacobian; - - for (int i = 0; i < idx->n_idxs; i++) - { - int len = J->p[i + 1] - J->p[i]; - memcpy(J->x + J->p[i], Jx->x + Jx->p[idx->indices[i]], len * sizeof(double)); - } + /* copy values of the selected rows into the preallocated output */ + x->jacobian->index_fill_values(x->jacobian, idx->indices, idx->n_idxs, + node->jacobian); } static void wsum_hess_init_impl(expr *node) @@ -113,8 +97,7 @@ static void wsum_hess_init_impl(expr *node) many numerical zeros in child->wsum_hess that are actually structural zeros, but we do not try to exploit that sparsity right now. */ - CSR_Matrix *Hx = x->wsum_hess; - node->wsum_hess = new_csr_copy_sparsity(Hx); + node->wsum_hess = x->wsum_hess->copy_sparsity(x->wsum_hess); } static void eval_wsum_hess(expr *node, const double *w) @@ -142,7 +125,8 @@ static void eval_wsum_hess(expr *node, const double *w) /* evalute hessian of child */ x->eval_wsum_hess(x, node->work->dwork); - memcpy(node->wsum_hess->x, x->wsum_hess->x, x->wsum_hess->nnz * sizeof(double)); + memcpy(node->wsum_hess->x, x->wsum_hess->x, + node->wsum_hess->nnz * sizeof(double)); } static bool is_affine(const expr *node) diff --git a/src/atoms/affine/left_matmul.c b/src/atoms/affine/left_matmul.c index c083385..7fd2a13 100644 --- a/src/atoms/affine/left_matmul.c +++ b/src/atoms/affine/left_matmul.c @@ -17,10 +17,14 @@ */ #include "atoms/affine.h" #include "subexpr.h" -#include "utils/dense_matrix.h" +#include "utils/matrix_BTA.h" +#include "utils/mini_numpy.h" +#include "utils/permuted_dense.h" +#include "utils/sparse_matrix.h" #include #include #include +#include /* This file implement the atom 'left_matmul' corresponding to the operation y = A @ f(x), where A is a given matrix and f(x) is an arbitrary expression. @@ -78,7 +82,7 @@ static void forward(expr *node, const double *u) node->left->forward(node->left, u); /* y = A_kron @ vec(f(x)) */ - Matrix *A = lnode->A; + matrix *A = lnode->A; int n_blocks = lnode->n_blocks; A->block_left_mult_vec(A, x->value, node->value, n_blocks); } @@ -93,8 +97,8 @@ static void free_type_data(expr *node) left_matmul_expr *lnode = (left_matmul_expr *) node; free_matrix(lnode->A); free_matrix(lnode->AT); - free_csc_matrix(lnode->Jchild_CSC); - free_csc_matrix(lnode->J_CSC); + free_CSC_matrix(lnode->Jchild_CSC); + free_CSC_matrix(lnode->J_CSC); free(lnode->csc_to_csr_work); if (lnode->param_source != NULL) { @@ -108,36 +112,67 @@ static void free_type_data(expr *node) lnode->param_source = NULL; } -static void jacobian_init_impl(expr *node) +/* TODO: use better polymorphism here if you add another matrix type*/ + +/* jacobian_init when node->jacobian is permuted_dense */ +static void jacobian_init_pd(expr *node) { + /* initialize jacobian of child */ expr *x = node->left; left_matmul_expr *lnode = (left_matmul_expr *) node; + jacobian_init(x); + + /* initialize this node's jacobian */ + node->jacobian = BA_pd_matrices_alloc((permuted_dense *) lnode->A, x->jacobian); +} + +/* eval_jacobian when node->jacobian is permuted_dense */ +static void eval_jacobian_pd(expr *node) +{ + /* evaluate jacobian of child */ + left_matmul_expr *lnode = (left_matmul_expr *) node; + expr *x = node->left; + x->eval_jacobian(x); + + /* must refresh CSC cache if x->jacobian is sparse_matrix */ + x->jacobian->refresh_csc_values(x->jacobian); + BA_pd_matrices_fill_values((permuted_dense *) lnode->A, x->jacobian, + (permuted_dense *) node->jacobian); +} - /* initialize child's jacobian and precompute sparsity of its CSC */ +/* jacobian_init when node->jacobian is sparse */ +static void jacobian_init_sparse(expr *node) +{ + /* initialize jacobian of child */ + expr *x = node->left; + left_matmul_expr *lnode = (left_matmul_expr *) node; jacobian_init(x); - lnode->Jchild_CSC = csr_to_csc_alloc(x->jacobian, node->work->iwork); - /* precompute sparsity of this node's jacobian in CSC and CSR */ + /* initialize this node's jacobian */ + lnode->Jchild_CSC = + csr_to_csc_alloc(x->jacobian->to_csr(x->jacobian), node->work->iwork); lnode->J_CSC = lnode->A->block_left_mult_sparsity(lnode->A, lnode->Jchild_CSC, lnode->n_blocks); - node->jacobian = csc_to_csr_alloc(lnode->J_CSC, lnode->csc_to_csr_work); + node->jacobian = + new_sparse_matrix(csc_to_csr_alloc(lnode->J_CSC, lnode->csc_to_csr_work)); } -static void eval_jacobian(expr *node) +/* eval_jacobian when node->jacobian is sparse */ +static void eval_jacobian_sparse(expr *node) { + /* evaluate jacobian of child */ left_matmul_expr *lnode = (left_matmul_expr *) node; expr *x = node->left; - - CSC_Matrix *Jchild_CSC = lnode->Jchild_CSC; - CSC_Matrix *J_CSC = lnode->J_CSC; - - /* evaluate child's jacobian and convert to CSC */ x->eval_jacobian(x); - csr_to_csc_fill_values(x->jacobian, Jchild_CSC, node->work->iwork); - /* compute this node's jacobian: */ + /* evaluate this node's jacobian */ + CSC_matrix *Jchild_CSC = lnode->Jchild_CSC; + CSC_matrix *J_CSC = lnode->J_CSC; + csr_to_csc_fill_values(x->jacobian->to_csr(x->jacobian), Jchild_CSC, + node->work->iwork); lnode->A->block_left_mult_values(lnode->A, Jchild_CSC, J_CSC); - csc_to_csr_fill_values(J_CSC, node->jacobian, lnode->csc_to_csr_work); + csc_to_csr_fill_values(J_CSC, node->jacobian->to_csr(node->jacobian), + lnode->csc_to_csr_work); } static void wsum_hess_init_impl(expr *node) @@ -147,7 +182,7 @@ static void wsum_hess_init_impl(expr *node) wsum_hess_init(x); /* allocate this node's hessian with the same sparsity as child's */ - node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess); + node->wsum_hess = x->wsum_hess->copy_sparsity(x->wsum_hess); /* work for computing A^T w*/ int n_blocks = ((left_matmul_expr *) node)->n_blocks; @@ -160,7 +195,7 @@ static void eval_wsum_hess(expr *node, const double *w) left_matmul_expr *lnode = (left_matmul_expr *) node; /* compute A^T w*/ - Matrix *AT = lnode->AT; + matrix *AT = lnode->AT; int n_blocks = lnode->n_blocks; AT->block_left_mult_vec(AT, w, node->work->dwork, n_blocks); @@ -171,19 +206,17 @@ static void eval_wsum_hess(expr *node, const double *w) static void refresh_dense_left(left_matmul_expr *lnode) { - Dense_Matrix *dm_A = (Dense_Matrix *) lnode->A; - Dense_Matrix *dm_AT = (Dense_Matrix *) lnode->AT; - int m = dm_A->base.m; - int n = dm_A->base.n; + int m = lnode->A->m; + int n = lnode->A->n; /* The parameter represents the A in left_matmul_dense(A, x) in column-major. In this diffengine, we store A in row-major order. Hence, param->vals actually corresponds to the transpose of A, and we transpose AT to get A. */ - memcpy(dm_AT->x, lnode->param_source->value, m * n * sizeof(double)); - A_transpose(dm_A->x, dm_AT->x, n, m); + memcpy(lnode->AT->x, lnode->param_source->value, m * n * sizeof(double)); + A_transpose(lnode->A->x, lnode->AT->x, n, m); } -expr *new_left_matmul(expr *param_node, expr *u, const CSR_Matrix *A) +expr *new_left_matmul(expr *param_node, expr *u, const CSR_matrix *A) { /* We expect u->d1 == A->n. However, numpy's broadcasting rules allow users to do A @ u where u is (n, ) which in C is actually (1, n). In that case @@ -212,23 +245,25 @@ expr *new_left_matmul(expr *param_node, expr *u, const CSR_Matrix *A) left_matmul_expr *lnode = (left_matmul_expr *) SP_CALLOC(1, sizeof(left_matmul_expr)); expr *node = &lnode->base; - init_expr(node, d1, d2, u->n_vars, forward, jacobian_init_impl, eval_jacobian, - is_affine, wsum_hess_init_impl, eval_wsum_hess, free_type_data); + /* Sparse A — always the general CSC-mirror path. */ + init_expr(node, d1, d2, u->n_vars, forward, jacobian_init_sparse, + eval_jacobian_sparse, is_affine, wsum_hess_init_impl, eval_wsum_hess, + free_type_data); node->left = u; expr_retain(u); /* allocate workspace. iwork is used for converting J_child csr to csc - (requiring size node->n_vars) and for transposing A (requiring size A->n). - csc_to_csr_work is used for converting J_CSC to CSR (requiring + (requiring size node->n_vars). + csc_to_csr_work is used for converting J_CSC to CSR_matrix (requiring node->size) */ - node->work->iwork = (int *) SP_MALLOC(MAX(A->n, node->n_vars) * sizeof(int)); + node->work->iwork = (int *) SP_MALLOC(node->n_vars * sizeof(int)); lnode->csc_to_csr_work = (int *) SP_MALLOC(node->size * sizeof(int)); lnode->n_blocks = n_blocks; - /* store A and AT */ - lnode->A = new_sparse_matrix(A); - lnode->AT = - sparse_matrix_trans((const Sparse_Matrix *) lnode->A, node->work->iwork); + /* store A and AT. new_sparse_matrix takes ownership, so clone first. */ + lnode->A = new_sparse_matrix(new_csr(A)); + lnode->AT = lnode->A->transpose_alloc(lnode->A); + lnode->A->transpose_fill_values(lnode->A, lnode->AT); /* parameter support */ lnode->param_source = param_node; @@ -245,6 +280,9 @@ expr *new_left_matmul(expr *param_node, expr *u, const CSR_Matrix *A) expr *new_left_matmul_dense(expr *param_node, expr *u, int m, int n, const double *data) { + /* TODO: do a helper function for this dimension check (so we can use it in both + * dense and sparse constructors). We could include even more code in that + * functon, all the day down to the parameter support I think*/ int d1, d2, n_blocks; if (u->d1 == n) { @@ -267,8 +305,18 @@ expr *new_left_matmul_dense(expr *param_node, expr *u, int m, int n, left_matmul_expr *lnode = (left_matmul_expr *) SP_CALLOC(1, sizeof(left_matmul_expr)); expr *node = &lnode->base; - init_expr(node, d1, d2, u->n_vars, forward, jacobian_init_impl, eval_jacobian, - is_affine, wsum_hess_init_impl, eval_wsum_hess, free_type_data); + /* PD A: the BA_pd_matrices dispatcher applies whenever there is a single + Kronecker block, whether A is constant or parameterized. With a + parameter, A's structure is fixed at construction (full-block PD with + trivial permutations); refresh_dense_left updates A->X before each + forward, and eval_jacobian_pd reads those refreshed values via + BA_pd_matrices_fill_values. With n_blocks > 1 the Kronecker structure + forces the general CSC-mirror path. */ + bool pd_path = (n_blocks == 1); + init_expr(node, d1, d2, u->n_vars, forward, + pd_path ? jacobian_init_pd : jacobian_init_sparse, + pd_path ? eval_jacobian_pd : eval_jacobian_sparse, is_affine, + wsum_hess_init_impl, eval_wsum_hess, free_type_data); node->left = u; expr_retain(u); @@ -290,8 +338,8 @@ expr *new_left_matmul_dense(expr *param_node, expr *u, int m, int n, lnode->refresh_param_values = refresh_dense_left; /* A and AT buffers are filled by refresh_dense_left from the parameter. */ - lnode->A = new_dense_matrix(m, n, NULL); - lnode->AT = new_dense_matrix(n, m, NULL); + lnode->A = new_permuted_dense_full(m, n, NULL); + lnode->AT = new_permuted_dense_full(n, m, NULL); node->needs_parameter_refresh = true; } /* constant matrix case */ @@ -303,8 +351,9 @@ expr *new_left_matmul_dense(expr *param_node, expr *u, int m, int n, exit(1); } - lnode->A = new_dense_matrix(m, n, data); - lnode->AT = dense_matrix_trans((const Dense_Matrix *) lnode->A); + lnode->A = new_permuted_dense_full(m, n, data); + lnode->AT = lnode->A->transpose_alloc(lnode->A); + lnode->A->transpose_fill_values(lnode->A, lnode->AT); } return node; diff --git a/src/atoms/affine/neg.c b/src/atoms/affine/neg.c index f453876..01bbf5e 100644 --- a/src/atoms/affine/neg.c +++ b/src/atoms/affine/neg.c @@ -40,7 +40,7 @@ static void jacobian_init_impl(expr *node) jacobian_init(x); /* same sparsity pattern as child */ - node->jacobian = new_csr_copy_sparsity(x->jacobian); + node->jacobian = x->jacobian->copy_sparsity(x->jacobian); } static void eval_jacobian(expr *node) @@ -49,10 +49,9 @@ static void eval_jacobian(expr *node) node->left->eval_jacobian(node->left); /* negate values only (sparsity pattern set in jacobian_init_impl) */ - CSR_Matrix *child_jac = node->left->jacobian; - for (int k = 0; k < child_jac->nnz; k++) + for (int k = 0; k < node->left->jacobian->nnz; k++) { - node->jacobian->x[k] = -child_jac->x[k]; + node->jacobian->x[k] = -node->left->jacobian->x[k]; } } @@ -63,9 +62,8 @@ static void wsum_hess_init_impl(expr *node) /* initialize child's wsum_hess */ wsum_hess_init(x); - /* same sparsity pattern as child */ - CSR_Matrix *child_hess = x->wsum_hess; - node->wsum_hess = new_csr_copy_sparsity(child_hess); + /* same sparsity pattern as child (polymorphic copy) */ + node->wsum_hess = x->wsum_hess->copy_sparsity(x->wsum_hess); } static void eval_wsum_hess(expr *node, const double *w) @@ -74,10 +72,9 @@ static void eval_wsum_hess(expr *node, const double *w) node->left->eval_wsum_hess(node->left, w); /* negate values (sparsity pattern set in wsum_hess_init_impl) */ - CSR_Matrix *child_hess = node->left->wsum_hess; - for (int k = 0; k < child_hess->nnz; k++) + for (int k = 0; k < node->left->wsum_hess->nnz; k++) { - node->wsum_hess->x[k] = -child_hess->x[k]; + node->wsum_hess->x[k] = -node->left->wsum_hess->x[k]; } } diff --git a/src/atoms/affine/parameter.c b/src/atoms/affine/parameter.c index 57458b0..0b9ffbe 100644 --- a/src/atoms/affine/parameter.c +++ b/src/atoms/affine/parameter.c @@ -17,6 +17,7 @@ */ #include "atoms/affine.h" #include "subexpr.h" +#include "utils/sparse_matrix.h" #include "utils/tracked_alloc.h" #include #include @@ -32,7 +33,7 @@ static void forward(expr *node, const double *u) static void jacobian_init_impl(expr *node) { /* Zero jacobian: size x n_vars with 0 nonzeros. */ - node->jacobian = new_csr_matrix(node->size, node->n_vars, 0); + node->jacobian = new_sparse_matrix_alloc(node->size, node->n_vars, 0); } static void eval_jacobian(expr *node) @@ -43,7 +44,7 @@ static void eval_jacobian(expr *node) static void wsum_hess_init_impl(expr *node) { /* Zero Hessian: n_vars x n_vars with 0 nonzeros. */ - node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, 0); + node->wsum_hess = new_sparse_matrix_alloc(node->n_vars, node->n_vars, 0); } static void eval_wsum_hess(expr *node, const double *w) diff --git a/src/atoms/affine/promote.c b/src/atoms/affine/promote.c index ffa21a6..1a031d8 100644 --- a/src/atoms/affine/promote.c +++ b/src/atoms/affine/promote.c @@ -40,46 +40,23 @@ static void jacobian_init_impl(expr *node) expr *x = node->left; jacobian_init(x); - /* each output row copies the single row from child's jacobian */ - int nnz = node->size * x->jacobian->nnz; - node->jacobian = new_csr_matrix(node->size, node->n_vars, nnz); - - /* fill sparsity pattern */ - CSR_Matrix *J = node->jacobian; - J->nnz = 0; - for (int row = 0; row < node->size; row++) - { - J->p[row] = J->nnz; - memcpy(J->i + J->nnz, x->jacobian->i, x->jacobian->nnz * sizeof(int)); - J->nnz += x->jacobian->nnz; - } - assert(J->nnz == nnz); - J->p[node->size] = J->nnz; + /* allocate sparsity for an (node->size, n_vars) matrix whose rows are all + copies of the child's single row; output type matches child's type. */ + node->jacobian = x->jacobian->promote_alloc(x->jacobian, node->size); } static void eval_jacobian(expr *node) { node->left->eval_jacobian(node->left); - CSR_Matrix *child_jac = node->left->jacobian; - CSR_Matrix *jac = node->jacobian; - int child_nnz = child_jac->p[1] - child_jac->p[0]; - - /* Copy child's row values to each output row */ - for (int row = 0; row < node->size; row++) - { - memcpy(jac->x + row * child_nnz, child_jac->x + child_jac->p[0], - child_nnz * sizeof(double)); - } + /* tile the child's single row into the preallocated output. */ + node->left->jacobian->promote_fill_values(node->left->jacobian, node->jacobian); } static void wsum_hess_init_impl(expr *node) { wsum_hess_init(node->left); - - /* same sparsity as child since we're summing weights */ - CSR_Matrix *child_hess = node->left->wsum_hess; - node->wsum_hess = new_csr_copy_sparsity(child_hess); + node->wsum_hess = node->left->wsum_hess->copy_sparsity(node->left->wsum_hess); } static void eval_wsum_hess(expr *node, const double *w) @@ -95,8 +72,8 @@ static void eval_wsum_hess(expr *node, const double *w) node->left->eval_wsum_hess(node->left, &sum_w); /* copy values */ - CSR_Matrix *child_hess = node->left->wsum_hess; - memcpy(node->wsum_hess->x, child_hess->x, child_hess->nnz * sizeof(double)); + memcpy(node->wsum_hess->x, node->left->wsum_hess->x, + node->left->wsum_hess->nnz * sizeof(double)); } static bool is_affine(const expr *node) diff --git a/src/atoms/affine/reshape.c b/src/atoms/affine/reshape.c index d654b17..e59a7ec 100644 --- a/src/atoms/affine/reshape.c +++ b/src/atoms/affine/reshape.c @@ -36,7 +36,7 @@ static void jacobian_init_impl(expr *node) { expr *x = node->left; jacobian_init(x); - node->jacobian = new_csr_copy_sparsity(x->jacobian); + node->jacobian = x->jacobian->copy_sparsity(x->jacobian); } static void eval_jacobian(expr *node) @@ -50,14 +50,15 @@ static void wsum_hess_init_impl(expr *node) { expr *x = node->left; wsum_hess_init(x); - node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess); + node->wsum_hess = x->wsum_hess->copy_sparsity(x->wsum_hess); } static void eval_wsum_hess(expr *node, const double *w) { expr *x = node->left; x->eval_wsum_hess(x, w); - memcpy(node->wsum_hess->x, x->wsum_hess->x, x->wsum_hess->nnz * sizeof(double)); + memcpy(node->wsum_hess->x, x->wsum_hess->x, + node->wsum_hess->nnz * sizeof(double)); } static bool is_affine(const expr *node) diff --git a/src/atoms/affine/right_matmul.c b/src/atoms/affine/right_matmul.c index c3344f2..5e3b39f 100644 --- a/src/atoms/affine/right_matmul.c +++ b/src/atoms/affine/right_matmul.c @@ -17,11 +17,12 @@ */ #include "atoms/affine.h" #include "subexpr.h" -#include "utils/CSR_Matrix.h" -#include "utils/dense_matrix.h" +#include "utils/CSR_matrix.h" +#include "utils/mini_numpy.h" #include "utils/tracked_alloc.h" #include #include +#include /* This file implements the atom 'right_matmul' corresponding to the operation y = f(x) @ A, where A is a given matrix and f(x) is an arbitrary expression. @@ -29,12 +30,12 @@ transpose: f(x) @ A = (A^T @ f(x)^T)^T. For the parameter case: - - param_source stores A values in CSR data order + - param_source stores A values in CSR_matrix data order - inner left_matmul stores AT as its A-matrix and A as its AT-matrix - on refresh: update AT (inner's AT, the original A) from param_source, then recompute A^T (inner's A) from the updated A. */ -/* Refresh for sparse right_matmul: param stores A in CSR data order. +/* Refresh for sparse right_matmul: param stores A in CSR_matrix data order. Inner left_matmul: lnode->A = AT (transposed), lnode->AT = A (original). So: update lnode->AT from param values, then recompute lnode->A. */ static void refresh_dense_right(left_matmul_expr *lnode) @@ -44,21 +45,21 @@ static void refresh_dense_right(left_matmul_expr *lnode) Furthermore, lnode->param_source->value corresponds to the column-major version of A, which is BT (an m x n matrix) */ - Dense_Matrix *B = (Dense_Matrix *) lnode->AT; - Dense_Matrix *BT = (Dense_Matrix *) lnode->A; - int m = B->base.n; - int n = B->base.m; + matrix *B = lnode->AT; + matrix *BT = lnode->A; + int m = B->n; + int n = B->m; memcpy(BT->x, lnode->param_source->value, m * n * sizeof(double)); A_transpose(B->x, BT->x, m, n); } -expr *new_right_matmul(expr *param_node, expr *u, const CSR_Matrix *A) +expr *new_right_matmul(expr *param_node, expr *u, const CSR_matrix *A) { /* We can express right matmul using left matmul and transpose: u @ A = (A^T @ u^T)^T. */ int *work_transpose = (int *) SP_MALLOC(A->n * sizeof(int)); - CSR_Matrix *AT = transpose(A, work_transpose); + CSR_matrix *AT = transpose(A, work_transpose); expr *u_transpose = new_transpose(u); expr *left_matmul = new_left_matmul(NULL, u_transpose, AT); @@ -74,7 +75,7 @@ expr *new_right_matmul(expr *param_node, expr *u, const CSR_Matrix *A) expr *node = new_transpose(left_matmul); - free_csr_matrix(AT); + free_CSR_matrix(AT); free(work_transpose); return node; } diff --git a/src/atoms/affine/scalar_mult.c b/src/atoms/affine/scalar_mult.c index e54142a..433f3e1 100644 --- a/src/atoms/affine/scalar_mult.c +++ b/src/atoms/affine/scalar_mult.c @@ -59,7 +59,7 @@ static void jacobian_init_impl(expr *node) jacobian_init(x); /* same sparsity as child */ - node->jacobian = new_csr_copy_sparsity(x->jacobian); + node->jacobian = x->jacobian->copy_sparsity(x->jacobian); } static void eval_jacobian(expr *node) @@ -85,7 +85,7 @@ static void wsum_hess_init_impl(expr *node) wsum_hess_init(x); /* same sparsity as child */ - node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess); + node->wsum_hess = x->wsum_hess->copy_sparsity(x->wsum_hess); } static void eval_wsum_hess(expr *node, const double *w) diff --git a/src/atoms/affine/sum.c b/src/atoms/affine/sum.c index c43d2ab..74f720b 100644 --- a/src/atoms/affine/sum.c +++ b/src/atoms/affine/sum.c @@ -16,9 +16,11 @@ * limitations under the License. */ #include "atoms/affine.h" +#include "subexpr.h" #include "utils/CSR_sum.h" #include "utils/int_double_pair.h" #include "utils/mini_numpy.h" +#include "utils/sparse_matrix.h" #include "utils/tracked_alloc.h" #include "utils/utils.h" #include @@ -86,12 +88,12 @@ static void jacobian_init_impl(expr *node) /* initialize child's jacobian */ jacobian_init(x); + CSR_matrix *Jx = x->jacobian->to_csr(x->jacobian); /* we never have to store more than the child's nnz */ - node->jacobian = new_csr_matrix(node->size, node->n_vars, x->jacobian->nnz); - node->work->iwork = - SP_MALLOC(MAX(node->jacobian->n, x->jacobian->nnz) * sizeof(int)); - snode->idx_map = SP_MALLOC(x->jacobian->nnz * sizeof(int)); + CSR_matrix *jac = new_CSR_matrix(node->size, node->n_vars, Jx->nnz); + node->work->iwork = SP_MALLOC(MAX(jac->n, Jx->nnz) * sizeof(int)); + snode->idx_map = SP_MALLOC(Jx->nnz * sizeof(int)); /* the idx_map array maps each nonzero entry j in x->jacobian to the corresponding index in the output row matrix C. Specifically, for @@ -100,68 +102,70 @@ static void jacobian_init_impl(expr *node) if (axis == -1) { - sum_all_rows_csr_alloc(x->jacobian, node->jacobian, node->work->iwork, - snode->idx_map); + sum_all_rows_csr_alloc(Jx, jac, node->work->iwork, snode->idx_map); } else if (axis == 0) { - sum_block_of_rows_csr_alloc(x->jacobian, node->jacobian, x->d1, - node->work->iwork, snode->idx_map); + sum_block_of_rows_csr_alloc(Jx, jac, x->d1, node->work->iwork, + snode->idx_map); } else if (axis == 1) { - sum_evenly_spaced_rows_csr_alloc(x->jacobian, node->jacobian, node->size, - node->work->iwork, snode->idx_map); + sum_evenly_spaced_rows_csr_alloc(Jx, jac, node->size, node->work->iwork, + snode->idx_map); } + + node->jacobian = new_sparse_matrix(jac); } static void eval_jacobian(expr *node) { - expr *x = node->left; + expr *child = node->left; /* evaluate child's jacobian */ - x->eval_jacobian(x); + child->eval_jacobian(child); /* we have precomputed an idx map between the nonzeros of the child's jacobian and this node's jacobian, so we just accumulate accordingly */ memset(node->jacobian->x, 0, node->jacobian->nnz * sizeof(double)); - accumulator(x->jacobian, ((sum_expr *) node)->idx_map, node->jacobian->x); + accumulator(child->jacobian->x, child->jacobian->nnz, + ((sum_expr *) node)->idx_map, node->jacobian->x); } static void wsum_hess_init_impl(expr *node) { - expr *x = node->left; + expr *child = node->left; /* initialize child's wsum_hess */ - wsum_hess_init(x); + wsum_hess_init(child); /* we never have to store more than the child's nnz */ - node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess); - node->work->dwork = SP_MALLOC(x->size * sizeof(double)); + node->wsum_hess = child->wsum_hess->copy_sparsity(child->wsum_hess); + node->work->dwork = SP_MALLOC(child->size * sizeof(double)); } static void eval_wsum_hess(expr *node, const double *w) { - expr *x = node->left; + expr *child = node->left; sum_expr *snode = (sum_expr *) node; int axis = snode->axis; if (axis == -1) { - scaled_ones(node->work->dwork, x->size, *w); + scaled_ones(node->work->dwork, child->size, *w); } else if (axis == 0) { - repeat(node->work->dwork, w, x->d2, x->d1); + repeat(node->work->dwork, w, child->d2, child->d1); } else if (axis == 1) { - tile_double(node->work->dwork, w, x->d1, x->d2); + tile_double(node->work->dwork, w, child->d1, child->d2); } - x->eval_wsum_hess(x, node->work->dwork); + child->eval_wsum_hess(child, node->work->dwork); - /* copy values */ - memcpy(node->wsum_hess->x, x->wsum_hess->x, x->wsum_hess->nnz * sizeof(double)); + memcpy(node->wsum_hess->x, child->wsum_hess->x, + node->wsum_hess->nnz * sizeof(double)); } static bool is_affine(const expr *node) diff --git a/src/atoms/affine/trace.c b/src/atoms/affine/trace.c index c421b3f..846a45b 100644 --- a/src/atoms/affine/trace.c +++ b/src/atoms/affine/trace.c @@ -16,8 +16,10 @@ * limitations under the License. */ #include "atoms/affine.h" +#include "subexpr.h" #include "utils/CSR_sum.h" #include "utils/int_double_pair.h" +#include "utils/sparse_matrix.h" #include "utils/tracked_alloc.h" #include "utils/utils.h" #include @@ -55,7 +57,7 @@ static void jacobian_init_impl(expr *node) // --------------------------------------------------------------- // count total nnz and allocate matrix with sufficient space // --------------------------------------------------------------- - const CSR_Matrix *A = x->jacobian; + const CSR_matrix *A = x->jacobian->to_csr(x->jacobian); int total_nnz = 0; int row_spacing = x->d1 + 1; @@ -64,22 +66,23 @@ static void jacobian_init_impl(expr *node) total_nnz += A->p[row + 1] - A->p[row]; } - node->jacobian = new_csr_matrix(1, node->n_vars, total_nnz); + CSR_matrix *jac = new_CSR_matrix(1, node->n_vars, total_nnz); // --------------------------------------------------------------- // fill sparsity pattern and idx_map // --------------------------------------------------------------- trace_expr *tnode = (trace_expr *) node; - node->work->iwork = SP_MALLOC(MAX(node->jacobian->n, total_nnz) * sizeof(int)); + node->work->iwork = SP_MALLOC(MAX(jac->n, total_nnz) * sizeof(int)); /* the idx_map array maps each nonzero entry j in the original matrix A (from the selected, evenly spaced rows) to the corresponding index in the output row matrix C. Specifically, for each nonzero entry j in A (from the selected rows), idx_map[j] gives the position in C->x where the value from A->x[j] should be accumulated. */ - tnode->idx_map = SP_MALLOC(x->jacobian->nnz * sizeof(int)); - sum_spaced_rows_into_row_csr_alloc(A, node->jacobian, row_spacing, - node->work->iwork, tnode->idx_map); + tnode->idx_map = SP_MALLOC(A->nnz * sizeof(int)); + sum_spaced_rows_into_row_csr_alloc(A, jac, row_spacing, node->work->iwork, + tnode->idx_map); + node->jacobian = new_sparse_matrix(jac); } static void eval_jacobian(expr *node) @@ -92,8 +95,8 @@ static void eval_jacobian(expr *node) /* local jacobian */ memset(node->jacobian->x, 0, node->jacobian->nnz * sizeof(double)); - accumulator_with_spacing(x->jacobian, tnode->idx_map, node->jacobian->x, - x->d1 + 1); + accumulator_with_spacing(x->jacobian->to_csr(x->jacobian), tnode->idx_map, + node->jacobian->x, x->d1 + 1); } /* Placeholders for Hessian-related functions */ @@ -110,7 +113,7 @@ static void wsum_hess_init_impl(expr *node) contribution to wsum_hess of entries of the child that will always have zero weight in eval_wsum_hess. We do this for simplicity. But the Hessian can for sure be made more sophisticated. */ - node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess); + node->wsum_hess = x->wsum_hess->copy_sparsity(x->wsum_hess); } static void eval_wsum_hess(expr *node, const double *w) @@ -125,7 +128,8 @@ static void eval_wsum_hess(expr *node, const double *w) x->eval_wsum_hess(x, node->work->dwork); - memcpy(node->wsum_hess->x, x->wsum_hess->x, sizeof(double) * x->wsum_hess->nnz); + memcpy(node->wsum_hess->x, x->wsum_hess->x, + node->wsum_hess->nnz * sizeof(double)); } static bool is_affine(const expr *node) diff --git a/src/atoms/affine/transpose.c b/src/atoms/affine/transpose.c index d4a7998..59c961e 100644 --- a/src/atoms/affine/transpose.c +++ b/src/atoms/affine/transpose.c @@ -16,8 +16,8 @@ * limitations under the License. */ #include "atoms/affine.h" +#include "utils/mini_numpy.h" #include "utils/tracked_alloc.h" -#include #include #include @@ -30,60 +30,38 @@ static void forward(expr *node, const double *u) /* local forward pass */ int d1 = node->d1; int d2 = node->d2; - double *X = node->left->value; - double *XT = node->value; - for (int i = 0; i < d1; ++i) - { - for (int j = 0; j < d2; ++j) - { - XT[j * d1 + i] = X[i * d2 + j]; - } - } + A_transpose(node->value, node->left->value, d1, d2); } static void jacobian_init_impl(expr *node) { expr *child = node->left; jacobian_init(child); - CSR_Matrix *Jc = child->jacobian; - node->jacobian = new_csr_matrix(node->size, node->n_vars, Jc->nnz); - /* fill sparsity */ - CSR_Matrix *J = node->jacobian; + int n_out = node->size; int d1 = node->d1; int d2 = node->d2; - int nnz = 0; - J->p[0] = 0; - /* 'k' is the old row that gets swapped to 'row'*/ - int k, len; - for (int row = 0; row < J->m; ++row) + /* The transpose's Jacobian is a row permutation of the child's: + J_node[r, :] = J_child[k(r), :] where k(r) = (r/d1) + (r%d1)*d2. */ + int *indices = (int *) SP_MALLOC(n_out * sizeof(int)); + for (int r = 0; r < n_out; r++) { - k = (row / d1) + (row % d1) * d2; - len = Jc->p[k + 1] - Jc->p[k]; - memcpy(J->i + nnz, Jc->i + Jc->p[k], len * sizeof(int)); - nnz += len; - J->p[row + 1] = nnz; + indices[r] = (r / d1) + (r % d1) * d2; } + + node->jacobian = child->jacobian->index_alloc(child->jacobian, indices, n_out); + + /* save indices for eval_jacobian */ + node->work->iwork = indices; } static void eval_jacobian(expr *node) { expr *child = node->left; child->eval_jacobian(child); - CSR_Matrix *Jc = child->jacobian; - CSR_Matrix *J = node->jacobian; - - int d1 = node->d1; - int d2 = node->d2; - int nnz = 0; - for (int row = 0; row < J->m; ++row) - { - int k = (row / d1) + (row % d1) * d2; - int len = Jc->p[k + 1] - Jc->p[k]; - memcpy(J->x + nnz, Jc->x + Jc->p[k], len * sizeof(double)); - nnz += len; - } + child->jacobian->index_fill_values(child->jacobian, node->work->iwork, + node->size, node->jacobian); } static void wsum_hess_init_impl(expr *node) @@ -93,7 +71,7 @@ static void wsum_hess_init_impl(expr *node) wsum_hess_init(x); /* same sparsity pattern as child */ - node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess); + node->wsum_hess = x->wsum_hess->copy_sparsity(x->wsum_hess); /* for computing Kw where K is the commutation matrix */ node->work->dwork = (double *) SP_MALLOC(node->size * sizeof(double)); @@ -102,7 +80,6 @@ static void eval_wsum_hess(expr *node, const double *w) { int d2 = node->d2; int d1 = node->d1; - // TODO: meaybe more efficient to do this with memcpy first /* evaluate hessian of child at Kw */ for (int i = 0; i < d2; ++i) diff --git a/src/atoms/affine/upper_tri.c b/src/atoms/affine/upper_tri.c index 51c339b..2da4d36 100644 --- a/src/atoms/affine/upper_tri.c +++ b/src/atoms/affine/upper_tri.c @@ -40,7 +40,7 @@ expr *new_upper_tri(expr *child) int *indices = NULL; if (n_elems > 0) { - indices = (int *) malloc((size_t) n_elems * sizeof(int)); + indices = (int *) malloc(n_elems * sizeof(int)); int k = 0; for (int i = 0; i < n; i++) { diff --git a/src/atoms/affine/variable.c b/src/atoms/affine/variable.c index 0dc12d2..fe010e7 100644 --- a/src/atoms/affine/variable.c +++ b/src/atoms/affine/variable.c @@ -16,6 +16,7 @@ * limitations under the License. */ #include "atoms/affine.h" +#include "utils/sparse_matrix.h" #include "utils/tracked_alloc.h" #include #include @@ -27,14 +28,15 @@ static void forward(expr *node, const double *u) static void jacobian_init_impl(expr *node) { - node->jacobian = new_csr_matrix(node->size, node->n_vars, node->size); + CSR_matrix *jac = new_CSR_matrix(node->size, node->n_vars, node->size); for (int j = 0; j < node->size; j++) { - node->jacobian->p[j] = j; - node->jacobian->i[j] = j + node->var_id; - node->jacobian->x[j] = 1.0; + jac->p[j] = j; + jac->i[j] = j + node->var_id; + jac->x[j] = 1.0; } - node->jacobian->p[node->size] = node->size; + jac->p[node->size] = node->size; + node->jacobian = new_sparse_matrix(jac); } static void eval_jacobian(expr *node) @@ -46,7 +48,7 @@ static void eval_jacobian(expr *node) static void wsum_hess_init_impl(expr *node) { /* Variables have zero Hessian */ - node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, 0); + node->wsum_hess = new_sparse_matrix_alloc(node->n_vars, node->n_vars, 0); } static void wsum_hess_eval(expr *node, const double *w) diff --git a/src/atoms/affine/vector_mult.c b/src/atoms/affine/vector_mult.c index 3647f8b..ee7992b 100644 --- a/src/atoms/affine/vector_mult.c +++ b/src/atoms/affine/vector_mult.c @@ -59,7 +59,7 @@ static void jacobian_init_impl(expr *node) jacobian_init(x); /* same sparsity as child */ - node->jacobian = new_csr_copy_sparsity(x->jacobian); + node->jacobian = x->jacobian->copy_sparsity(x->jacobian); } static void eval_jacobian(expr *node) @@ -67,17 +67,11 @@ static void eval_jacobian(expr *node) expr *x = node->left; const double *a = ((vector_mult_expr *) node)->param_source->value; - /* evaluate x */ + /* evaluate jacobian of child */ x->eval_jacobian(x); - /* row-wise scale child's jacobian */ - for (int i = 0; i < node->size; i++) - { - for (int j = x->jacobian->p[i]; j < x->jacobian->p[i + 1]; j++) - { - node->jacobian->x[j] = a[i] * x->jacobian->x[j]; - } - } + /* row-wise scale child's jacobian: diag(a) @ Jx */ + x->jacobian->DA_fill_values(a, x->jacobian, node->jacobian); } static void wsum_hess_init_impl(expr *node) @@ -88,8 +82,9 @@ static void wsum_hess_init_impl(expr *node) wsum_hess_init(x); /* same sparsity as child */ - node->wsum_hess = new_csr_copy_sparsity(x->wsum_hess); + node->wsum_hess = x->wsum_hess->copy_sparsity(x->wsum_hess); + /* workspace for storing scaled weights */ node->work->dwork = (double *) SP_MALLOC(node->size * sizeof(double)); } @@ -107,7 +102,8 @@ static void eval_wsum_hess(expr *node, const double *w) x->eval_wsum_hess(x, node->work->dwork); /* copy values from child to this node */ - memcpy(node->wsum_hess->x, x->wsum_hess->x, x->wsum_hess->nnz * sizeof(double)); + memcpy(node->wsum_hess->x, x->wsum_hess->x, + node->wsum_hess->nnz * sizeof(double)); } static void free_type_data(expr *node) diff --git a/src/atoms/bivariate_full_dom/matmul.c b/src/atoms/bivariate_full_dom/matmul.c index 5fb8af6..e3ef874 100644 --- a/src/atoms/bivariate_full_dom/matmul.c +++ b/src/atoms/bivariate_full_dom/matmul.c @@ -17,12 +17,13 @@ */ #include "atoms/bivariate_full_dom.h" #include "subexpr.h" -#include "utils/CSC_Matrix.h" -#include "utils/CSR_Matrix.h" +#include "utils/CSC_matrix.h" +#include "utils/CSR_matrix.h" #include "utils/CSR_sum.h" #include "utils/linalg_dense_sparse_matmuls.h" #include "utils/linalg_sparse_matmuls.h" #include "utils/mini_numpy.h" +#include "utils/sparse_matrix.h" #include "utils/tracked_alloc.h" #include "utils/utils.h" #include @@ -41,10 +42,10 @@ // column positions (offset by j in the Y-variable indexing). // ------------------------------------------------------------------------------ -static CSR_Matrix *build_cross_hessian_sparsity(int m, int k, int n) +static CSR_matrix *build_cross_hessian_sparsity(int m, int k, int n) { int total_nnz = m * k * n; - CSR_Matrix *B = new_csr_matrix(m * k, k * n, total_nnz); + CSR_matrix *B = new_CSR_matrix(m * k, k * n, total_nnz); int idx = 0; for (int j = 0; j < k; j++) @@ -64,7 +65,7 @@ static CSR_Matrix *build_cross_hessian_sparsity(int m, int k, int n) } static void fill_cross_hessian_values(int m, int k, int n, const double *w, - CSR_Matrix *B) + CSR_matrix *B) { int idx = 0; for (int j = 0; j < k; j++) @@ -102,15 +103,15 @@ static void free_matmul_data(expr *node) { matmul_expr *mnode = (matmul_expr *) node; /* Jacobian workspace */ - free_csr_matrix(mnode->term1_CSR); - free_csr_matrix(mnode->term2_CSR); + free_CSR_matrix(mnode->term1_CSR); + free_CSR_matrix(mnode->term2_CSR); /* Hessian workspace */ - free_csr_matrix(mnode->B); - free_csr_matrix(mnode->BJg); - free_csc_matrix(mnode->BJg_CSC); + free_CSR_matrix(mnode->B); + free_CSR_matrix(mnode->BJg); + free_CSC_matrix(mnode->BJg_CSC); free(mnode->BJg_csc_work); - free_csr_matrix(mnode->C); - free_csr_matrix(mnode->CT); + free_CSR_matrix(mnode->C); + free_CSR_matrix(mnode->CT); free(mnode->idx_map_C); free(mnode->idx_map_CT); free(mnode->idx_map_Hf); @@ -139,7 +140,7 @@ static void jacobian_init_no_chain_rule(expr *node) int k = x->d2; int n = y->d2; int nnz = m * n * 2 * k; - node->jacobian = new_csr_matrix(node->size, node->n_vars, nnz); + CSR_matrix *jac = new_CSR_matrix(node->size, node->n_vars, nnz); int nnz_idx = 0; for (int i = 0; i < node->size; i++) @@ -147,33 +148,34 @@ static void jacobian_init_no_chain_rule(expr *node) int row = i % m; int col = i / m; - node->jacobian->p[i] = nnz_idx; + jac->p[i] = nnz_idx; if (x->var_id < y->var_id) { for (int j = 0; j < k; j++) { - node->jacobian->i[nnz_idx++] = x->var_id + row + j * m; + jac->i[nnz_idx++] = x->var_id + row + j * m; } for (int j = 0; j < k; j++) { - node->jacobian->i[nnz_idx++] = y->var_id + col * k + j; + jac->i[nnz_idx++] = y->var_id + col * k + j; } } else { for (int j = 0; j < k; j++) { - node->jacobian->i[nnz_idx++] = y->var_id + col * k + j; + jac->i[nnz_idx++] = y->var_id + col * k + j; } for (int j = 0; j < k; j++) { - node->jacobian->i[nnz_idx++] = x->var_id + row + j * m; + jac->i[nnz_idx++] = x->var_id + row + j * m; } } } - node->jacobian->p[node->size] = nnz_idx; + jac->p[node->size] = nnz_idx; assert(nnz_idx == nnz); + node->jacobian = new_sparse_matrix(jac); } static void eval_jacobian_no_chain_rule(expr *node) @@ -182,13 +184,14 @@ static void eval_jacobian_no_chain_rule(expr *node) expr *y = node->right; int m = x->d1; int k = x->d2; - double *Jx = node->jacobian->x; + CSR_matrix *jac = node->jacobian->to_csr(node->jacobian); + double *Jx = jac->x; for (int i = 0; i < node->size; i++) { int row = i % m; int col = i / m; - int pos = node->jacobian->p[i]; + int pos = jac->p[i]; if (x->var_id < y->var_id) { @@ -234,8 +237,9 @@ static void jacobian_init_chain_rule(expr *node) mnode->term1_CSR = YT_kron_I_alloc(m, k, n, f->work->jacobian_csc); mnode->term2_CSR = I_kron_X_alloc(m, k, n, g->work->jacobian_csc); int max_nnz = mnode->term1_CSR->nnz + mnode->term2_CSR->nnz; - node->jacobian = new_csr_matrix(node->size, node->n_vars, max_nnz); - sum_csr_alloc(mnode->term1_CSR, mnode->term2_CSR, node->jacobian); + CSR_matrix *jac = new_CSR_matrix(node->size, node->n_vars, max_nnz); + sum_csr_alloc(mnode->term1_CSR, mnode->term2_CSR, jac); + node->jacobian = new_sparse_matrix(jac); } static void eval_jacobian_chain_rule(expr *node) @@ -250,14 +254,16 @@ static void eval_jacobian_chain_rule(expr *node) /* evaluate Jacobians of children */ f->eval_jacobian(f); g->eval_jacobian(g); - csr_to_csc_fill_values(f->jacobian, f->work->jacobian_csc, f->work->csc_work); - csr_to_csc_fill_values(g->jacobian, g->work->jacobian_csc, g->work->csc_work); + csr_to_csc_fill_values(f->jacobian->to_csr(f->jacobian), f->work->jacobian_csc, + f->work->csc_work); + csr_to_csc_fill_values(g->jacobian->to_csr(g->jacobian), g->work->jacobian_csc, + g->work->csc_work); /* evaluate term1, term2, and their sum */ YT_kron_I_fill_values(m, k, n, g->value, f->work->jacobian_csc, mnode->term1_CSR); I_kron_X_fill_values(m, k, n, f->value, g->work->jacobian_csc, mnode->term2_CSR); - sum_csr_fill_values(mnode->term1_CSR, mnode->term2_CSR, node->jacobian); + sum_csr_fill_values(mnode->term1_CSR, mnode->term2_CSR, node->jacobian->to_csr(node->jacobian)); } // ------------------------------------------------------------------------------------ @@ -272,10 +278,10 @@ static void wsum_hess_init_no_chain_rule(expr *node) int k = x->d2; int n = y->d2; int total_nnz = 2 * m * k * n; - node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, total_nnz); + CSR_matrix *hess = new_CSR_matrix(node->n_vars, node->n_vars, total_nnz); int nnz = 0; - int *Hi = node->wsum_hess->i; - int *Hp = node->wsum_hess->p; + int *Hi = hess->i; + int *Hp = hess->p; int start, i; if (x->var_id < y->var_id) @@ -338,6 +344,7 @@ static void wsum_hess_init_no_chain_rule(expr *node) } Hp[node->n_vars] = nnz; assert(nnz == total_nnz); + node->wsum_hess = new_sparse_matrix(hess); } static void eval_wsum_hess_no_chain_rule(expr *node, const double *w) @@ -414,8 +421,8 @@ static void wsum_hess_init_chain_rule(expr *node) int m = f->d1; int k = f->d2; int n = g->d2; - CSC_Matrix *Jf = f->work->jacobian_csc; - CSC_Matrix *Jg = g->work->jacobian_csc; + CSC_matrix *Jf = f->work->jacobian_csc; + CSC_matrix *Jg = g->work->jacobian_csc; /* initialize C = Jf^T @ B @ Jg = Jf^T @ (B @ Jg) */ mnode->B = build_cross_hessian_sparsity(m, k, n); @@ -435,8 +442,10 @@ static void wsum_hess_init_chain_rule(expr *node) /* sum the four terms and fill idx maps */ int *maps[4]; - node->wsum_hess = - sum_4_csr_alloc(mnode->C, mnode->CT, f->wsum_hess, g->wsum_hess, maps); + CSR_matrix *hess = + sum_4_csr_alloc(mnode->C, mnode->CT, f->wsum_hess->to_csr(f->wsum_hess), + g->wsum_hess->to_csr(g->wsum_hess), maps); + node->wsum_hess = new_sparse_matrix(hess); mnode->idx_map_C = maps[0]; mnode->idx_map_CT = maps[1]; mnode->idx_map_Hf = maps[2]; @@ -460,23 +469,23 @@ static void eval_wsum_hess_chain_rule(expr *node, const double *w) int n = g->d2; bool is_f_affine = f->is_affine(f); bool is_g_affine = g->is_affine(g); - CSC_Matrix *Jf = f->work->jacobian_csc; - CSC_Matrix *Jg = g->work->jacobian_csc; + CSC_matrix *Jf = f->work->jacobian_csc; + CSC_matrix *Jg = g->work->jacobian_csc; - /* refresh child Jacobian CSC values (cache if affine) */ + /* refresh child Jacobian CSC_matrix values (cache if affine) */ if (!f->work->jacobian_csc_filled) { - csr_to_csc_fill_values(f->jacobian, Jf, f->work->csc_work); + csr_to_csc_fill_values(f->jacobian->to_csr(f->jacobian), Jf, f->work->csc_work); if (is_f_affine) { f->work->jacobian_csc_filled = true; } } - /* refresh child Jacobian CSC values (cache if affine) */ + /* refresh child Jacobian CSC_matrix values (cache if affine) */ if (!g->work->jacobian_csc_filled) { - csr_to_csc_fill_values(g->jacobian, Jg, g->work->csc_work); + csr_to_csc_fill_values(g->jacobian->to_csr(g->jacobian), Jg, g->work->csc_work); if (is_g_affine) { g->work->jacobian_csc_filled = true; @@ -508,10 +517,12 @@ static void eval_wsum_hess_chain_rule(expr *node, const double *w) /* accumulate H = C + C^T + H_f + H_g */ memset(node->wsum_hess->x, 0, node->wsum_hess->nnz * sizeof(double)); - accumulator(mnode->C, mnode->idx_map_C, node->wsum_hess->x); - accumulator(mnode->CT, mnode->idx_map_CT, node->wsum_hess->x); - accumulator(f->wsum_hess, mnode->idx_map_Hf, node->wsum_hess->x); - accumulator(g->wsum_hess, mnode->idx_map_Hg, node->wsum_hess->x); + accumulator(mnode->C->x, mnode->C->nnz, mnode->idx_map_C, node->wsum_hess->x); + accumulator(mnode->CT->x, mnode->CT->nnz, mnode->idx_map_CT, node->wsum_hess->x); + accumulator(f->wsum_hess->x, f->wsum_hess->nnz, mnode->idx_map_Hf, + node->wsum_hess->x); + accumulator(g->wsum_hess->x, g->wsum_hess->nnz, mnode->idx_map_Hg, + node->wsum_hess->x); } expr *new_matmul(expr *x, expr *y) diff --git a/src/atoms/bivariate_full_dom/multiply.c b/src/atoms/bivariate_full_dom/multiply.c index 5f606f8..7ae7841 100644 --- a/src/atoms/bivariate_full_dom/multiply.c +++ b/src/atoms/bivariate_full_dom/multiply.c @@ -17,7 +17,11 @@ */ #include "atoms/bivariate_full_dom.h" #include "subexpr.h" +#include "utils/CSR_matrix.h" #include "utils/CSR_sum.h" +#include "utils/matrix_BTA.h" +#include "utils/matrix_sum.h" +#include "utils/sparse_matrix.h" #include "utils/tracked_alloc.h" #include #include @@ -50,10 +54,10 @@ static void jacobian_init_impl(expr *node) jacobian_init(node->left); jacobian_init(node->right); int nnz_max = node->left->jacobian->nnz + node->right->jacobian->nnz; - node->jacobian = new_csr_matrix(node->size, node->n_vars, nnz_max); + node->jacobian = new_sparse_matrix_alloc(node->size, node->n_vars, nnz_max); /* fill sparsity pattern */ - sum_csr_alloc(node->left->jacobian, node->right->jacobian, node->jacobian); + sum_matrices_alloc(node->left->jacobian, node->right->jacobian, node->jacobian); } static void eval_jacobian(expr *node) @@ -66,8 +70,8 @@ static void eval_jacobian(expr *node) /* chain rule: the jacobian of h(x) = f(g1(x), g2(x))) is Jh = J_{f, 1} J_{g1} + * J_{f, 2} J_{g2} */ - sum_scaled_csr_matrices_fill_values(x->jacobian, y->jacobian, node->jacobian, - y->value, x->value); + sum_scaled_matrices_fill_values(x->jacobian, y->jacobian, node->jacobian, + y->value, x->value); } static void wsum_hess_init_impl(expr *node) @@ -80,7 +84,8 @@ static void wsum_hess_init_impl(expr *node) x->var_id != y->var_id) { assert(y->var_id != NOT_A_VARIABLE); - node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, 2 * node->size); + CSR_matrix *hess = + new_CSR_matrix(node->n_vars, node->n_vars, 2 * node->size); int i, var1_id, var2_id; @@ -98,8 +103,8 @@ static void wsum_hess_init_impl(expr *node) /* var1 rows of Hessian */ for (i = 0; i < node->size; i++) { - node->wsum_hess->p[var1_id + i] = i; - node->wsum_hess->i[i] = var2_id + i; + hess->p[var1_id + i] = i; + hess->i[i] = var2_id + i; } int nnz = node->size; @@ -107,22 +112,23 @@ static void wsum_hess_init_impl(expr *node) /* rows between var1 and var2 */ for (i = var1_id + node->size; i < var2_id; i++) { - node->wsum_hess->p[i] = nnz; + hess->p[i] = nnz; } /* var2 rows of Hessian */ for (i = 0; i < node->size; i++) { - node->wsum_hess->p[var2_id + i] = nnz + i; - node->wsum_hess->i[nnz + i] = var1_id + i; + hess->p[var2_id + i] = nnz + i; + hess->i[nnz + i] = var1_id + i; } /* remaining rows */ nnz += node->size; for (i = var2_id + node->size; i <= node->n_vars; i++) { - node->wsum_hess->p[i] = nnz; + hess->p[i] = nnz; } + node->wsum_hess = new_sparse_matrix(hess); } else { @@ -145,30 +151,37 @@ static void wsum_hess_init_impl(expr *node) node->work->dwork = (double *) SP_MALLOC(node->size * sizeof(double)); } - /* prepare sparsity pattern of csc conversion */ - jacobian_csc_init(x); - jacobian_csc_init(y); - CSC_Matrix *Jg1 = x->work->jacobian_csc; - CSC_Matrix *Jg2 = y->work->jacobian_csc; + /* For sparse matrices we need the CSC cache to be valid for the + BTA_matrices_alloc / BTDA_matrices_fill_values calls below. */ + if (!x->jacobian->is_permuted_dense) + { + sparse_matrix_ensure_csc_cache((sparse_matrix *) x->jacobian); + } + if (!y->jacobian->is_permuted_dense) + { + sparse_matrix_ensure_csc_cache((sparse_matrix *) y->jacobian); + } /* compute sparsity of C and prepare CT */ - CSR_Matrix *C = BTA_alloc(Jg1, Jg2); - node->work->iwork = (int *) SP_MALLOC(C->m * sizeof(int)); - CSR_Matrix *CT = AT_alloc(C, node->work->iwork); + matrix *C = BTA_matrices_alloc(x->jacobian, y->jacobian); + matrix *CT = C->transpose_alloc(C); /* initialize wsum_hessians of children */ wsum_hess_init(x); wsum_hess_init(y); elementwise_mult_expr *mul_node = (elementwise_mult_expr *) node; - mul_node->CSR_work1 = C; - mul_node->CSR_work2 = CT; + mul_node->C = C; + mul_node->CT = CT; /* compute sparsity pattern of H = C + C^T + term2 + term3 (we also fill index maps telling us where to accumulate each element of each matrix in the sum) */ int *maps[4]; - node->wsum_hess = sum_4_csr_alloc(C, CT, x->wsum_hess, y->wsum_hess, maps); + CSR_matrix *hess = sum_4_csr_alloc(C->to_csr(C), CT->to_csr(CT), + x->wsum_hess->to_csr(x->wsum_hess), + y->wsum_hess->to_csr(y->wsum_hess), maps); + node->wsum_hess = new_sparse_matrix(hess); mul_node->idx_map_C = maps[0]; mul_node->idx_map_CT = maps[1]; mul_node->idx_map_Hx = maps[2]; @@ -193,44 +206,34 @@ static void eval_wsum_hess(expr *node, const double *w) bool is_x_affine = x->is_affine(x); bool is_y_affine = y->is_affine(y); // ---------------------------------------------------------------------- - // convert Jacobians of children to CSC format - // (we only need to do this once if the child is affine) - // TODO: what if we have parameters? Should we set jacobian_csc_filled - // to false whenever parameters change value? + // Refresh each operand's CSC_matrix cache as needed for the (Sparse, + // Sparse) dispatch path. For PD operands, refresh_csc_values is a no-op. + // The jacobian_csc_filled flag preserves the affine optimization: we only + // refresh on the first eval for affine children. // ---------------------------------------------------------------------- if (!x->work->jacobian_csc_filled) { - csr_to_csc_fill_values(x->jacobian, x->work->jacobian_csc, - x->work->csc_work); - + x->jacobian->refresh_csc_values(x->jacobian); if (is_x_affine) { x->work->jacobian_csc_filled = true; } } - if (!y->work->jacobian_csc_filled) { - csr_to_csc_fill_values(y->jacobian, y->work->jacobian_csc, - y->work->csc_work); - + y->jacobian->refresh_csc_values(y->jacobian); if (is_y_affine) { y->work->jacobian_csc_filled = true; } } - CSC_Matrix *Jg1 = x->work->jacobian_csc; - CSC_Matrix *Jg2 = y->work->jacobian_csc; - // --------------------------------------------------------------- // compute C and CT // --------------------------------------------------------------- elementwise_mult_expr *mul_node = (elementwise_mult_expr *) node; - CSR_Matrix *C = mul_node->CSR_work1; - CSR_Matrix *CT = mul_node->CSR_work2; - BTDA_fill_values(Jg1, Jg2, w, C); - AT_fill_values(C, CT, node->work->iwork); + BTDA_matrices_fill_values(x->jacobian, w, y->jacobian, mul_node->C); + mul_node->C->transpose_fill_values(mul_node->C, mul_node->CT); // --------------------------------------------------------------- // compute term2 and term 3 @@ -257,18 +260,22 @@ static void eval_wsum_hess(expr *node, const double *w) // compute H = C + C^T + term2 + term3 // --------------------------------------------------------------- memset(node->wsum_hess->x, 0, node->wsum_hess->nnz * sizeof(double)); - accumulator(C, mul_node->idx_map_C, node->wsum_hess->x); - accumulator(CT, mul_node->idx_map_CT, node->wsum_hess->x); - accumulator(x->wsum_hess, mul_node->idx_map_Hx, node->wsum_hess->x); - accumulator(y->wsum_hess, mul_node->idx_map_Hy, node->wsum_hess->x); + accumulator(mul_node->C->x, mul_node->C->nnz, mul_node->idx_map_C, + node->wsum_hess->x); + accumulator(mul_node->CT->x, mul_node->CT->nnz, mul_node->idx_map_CT, + node->wsum_hess->x); + accumulator(x->wsum_hess->x, x->wsum_hess->nnz, mul_node->idx_map_Hx, + node->wsum_hess->x); + accumulator(y->wsum_hess->x, y->wsum_hess->nnz, mul_node->idx_map_Hy, + node->wsum_hess->x); } } static void free_type_data(expr *node) { elementwise_mult_expr *mul_node = (elementwise_mult_expr *) node; - free_csr_matrix(mul_node->CSR_work1); - free_csr_matrix(mul_node->CSR_work2); + free_matrix(mul_node->C); + free_matrix(mul_node->CT); free(mul_node->idx_map_C); free(mul_node->idx_map_CT); free(mul_node->idx_map_Hx); diff --git a/src/atoms/bivariate_restricted_dom/quad_over_lin.c b/src/atoms/bivariate_restricted_dom/quad_over_lin.c index ecf2741..12be5a1 100644 --- a/src/atoms/bivariate_restricted_dom/quad_over_lin.c +++ b/src/atoms/bivariate_restricted_dom/quad_over_lin.c @@ -17,7 +17,8 @@ */ #include "atoms/bivariate_restricted_dom.h" #include "subexpr.h" -#include "utils/CSC_Matrix.h" +#include "utils/CSC_matrix.h" +#include "utils/sparse_matrix.h" #include "utils/tracked_alloc.h" #include #include @@ -58,27 +59,28 @@ static void jacobian_init_impl(expr *node) /* if left node is a variable */ if (x->var_id != NOT_A_VARIABLE) { - node->jacobian = new_csr_matrix(1, node->n_vars, x->size + 1); - node->jacobian->p[0] = 0; - node->jacobian->p[1] = x->size + 1; + CSR_matrix *jac = new_CSR_matrix(1, node->n_vars, x->size + 1); + jac->p[0] = 0; + jac->p[1] = x->size + 1; /* if x has lower idx than y*/ if (x->var_id < y->var_id) { for (int j = 0; j < x->size; j++) { - node->jacobian->i[j] = x->var_id + j; + jac->i[j] = x->var_id + j; } - node->jacobian->i[x->size] = y->var_id; + jac->i[x->size] = y->var_id; } else /* y has lower idx than x */ { - node->jacobian->i[0] = y->var_id; + jac->i[0] = y->var_id; for (int j = 0; j < x->size; j++) { - node->jacobian->i[j + 1] = x->var_id + j; + jac->i[j + 1] = x->var_id + j; } } + node->jacobian = new_sparse_matrix(jac); } else /* left node is not a variable (guaranteed to be a linear operator) */ { @@ -87,46 +89,48 @@ static void jacobian_init_impl(expr *node) /* compute required allocation and allocate jacobian */ bool *col_nz = (bool *) SP_CALLOC( node->n_vars, sizeof(bool)); /* TODO: could use iwork here instead*/ - int nonzero_cols = count_nonzero_cols(x->jacobian, col_nz); - node->jacobian = new_csr_matrix(1, node->n_vars, nonzero_cols + 1); + CSR_matrix *Jx = x->jacobian->to_csr(x->jacobian); + int nonzero_cols = count_nonzero_cols(Jx, col_nz); + CSR_matrix *jac = new_CSR_matrix(1, node->n_vars, nonzero_cols + 1); /* precompute column indices */ - node->jacobian->nnz = 0; + jac->nnz = 0; for (int j = 0; j < node->n_vars; j++) { if (col_nz[j]) { - node->jacobian->i[node->jacobian->nnz] = j; - node->jacobian->nnz++; + jac->i[jac->nnz] = j; + jac->nnz++; } } - assert(nonzero_cols == node->jacobian->nnz); + assert(nonzero_cols == jac->nnz); free(col_nz); /* insert y variable index at correct position */ - insert_idx(y->var_id, node->jacobian->i, node->jacobian->nnz); - node->jacobian->nnz += 1; - node->jacobian->p[0] = 0; - node->jacobian->p[1] = node->jacobian->nnz; + insert_idx(y->var_id, jac->i, jac->nnz); + jac->nnz += 1; + jac->p[0] = 0; + jac->p[1] = jac->nnz; /* find position where y should be inserted */ node->work->iwork = (int *) SP_MALLOC(sizeof(int)); - for (int j = 0; j < node->jacobian->nnz; j++) + for (int j = 0; j < jac->nnz; j++) { - if (node->jacobian->i[j] == y->var_id) + if (jac->i[j] == y->var_id) { node->work->iwork[0] = j; break; } } - /* prepare CSC form of child jacobian for chain rule. + node->jacobian = new_sparse_matrix(jac); + + /* prepare CSC_matrix form of child jacobian for chain rule. * For a linear operator the values are constant, so fill * them once here. */ jacobian_csc_init(x); - csr_to_csc_fill_values(x->jacobian, x->work->jacobian_csc, - x->work->csc_work); + csr_to_csc_fill_values(Jx, x->work->jacobian_csc, x->work->csc_work); } } @@ -134,6 +138,7 @@ static void eval_jacobian(expr *node) { expr *x = node->left; expr *y = node->right; + CSR_matrix *jac = node->jacobian->to_csr(node->jacobian); /* if x is a variable */ if (x->var_id != NOT_A_VARIABLE) @@ -143,16 +148,16 @@ static void eval_jacobian(expr *node) { for (int j = 0; j < x->size; j++) { - node->jacobian->x[j] = (2.0 * x->value[j]) / y->value[0]; + jac->x[j] = (2.0 * x->value[j]) / y->value[0]; } - node->jacobian->x[x->size] = -node->value[0] / y->value[0]; + jac->x[x->size] = -node->value[0] / y->value[0]; } else /* y has lower idx than x */ { - node->jacobian->x[0] = -node->value[0] / y->value[0]; + jac->x[0] = -node->value[0] / y->value[0]; for (int j = 0; j < x->size; j++) { - node->jacobian->x[j + 1] = (2.0 * x->value[j]) / y->value[0]; + jac->x[j + 1] = (2.0 * x->value[j]) / y->value[0]; } } } @@ -164,13 +169,13 @@ static void eval_jacobian(expr *node) node->work->dwork[j] = (2.0 * x->value[j]) / y->value[0]; } - /* chain rule (no derivative wrt y) using CSC format */ - yTA_fill_values(x->work->jacobian_csc, node->work->dwork, node->jacobian); + /* chain rule (no derivative wrt y) using CSC_matrix format */ + yTA_fill_values(x->work->jacobian_csc, node->work->dwork, jac); /* insert derivative wrt y at right place (for correctness this assumes that y does not appear in the numerator, but this will always be the case since y is a new variable for the denominator */ - node->jacobian->x[node->work->iwork[0]] = -node->value[0] / y->value[0]; + jac->x[node->work->iwork[0]] = -node->value[0] / y->value[0]; } } @@ -184,9 +189,8 @@ static void wsum_hess_init_impl(expr *node) /* if left node is a variable */ if (x->var_id != NOT_A_VARIABLE) { - node->wsum_hess = - new_csr_matrix(node->n_vars, node->n_vars, 3 * x->size + 1); - CSR_Matrix *H = node->wsum_hess; + CSR_matrix *H = new_CSR_matrix(node->n_vars, node->n_vars, 3 * x->size + 1); + node->wsum_hess = new_sparse_matrix(H); /* if x has lower idx than y*/ if (var_id_x < var_id_y) diff --git a/src/atoms/bivariate_restricted_dom/rel_entr.c b/src/atoms/bivariate_restricted_dom/rel_entr.c index d2ab721..ecd4520 100644 --- a/src/atoms/bivariate_restricted_dom/rel_entr.c +++ b/src/atoms/bivariate_restricted_dom/rel_entr.c @@ -16,6 +16,7 @@ * limitations under the License. */ #include "atoms/bivariate_restricted_dom.h" +#include "utils/sparse_matrix.h" #include "utils/tracked_alloc.h" #include #include @@ -45,7 +46,7 @@ static void forward_vector_args(expr *node, const double *u) static void jacobian_init_vectors_args(expr *node) { - node->jacobian = new_csr_matrix(node->size, node->n_vars, 2 * node->size); + CSR_matrix *jac = new_CSR_matrix(node->size, node->n_vars, 2 * node->size); expr *x = node->left; expr *y = node->right; @@ -57,29 +58,29 @@ static void jacobian_init_vectors_args(expr *node) { for (int j = 0; j < node->size; j++) { - node->jacobian->i[2 * j] = j + x->var_id; - node->jacobian->i[2 * j + 1] = j + y->var_id; - node->jacobian->p[j] = 2 * j; + jac->i[2 * j] = j + x->var_id; + jac->i[2 * j + 1] = j + y->var_id; + jac->p[j] = 2 * j; } } else { for (int j = 0; j < node->size; j++) { - node->jacobian->i[2 * j] = j + y->var_id; - node->jacobian->i[2 * j + 1] = j + x->var_id; - node->jacobian->p[j] = 2 * j; + jac->i[2 * j] = j + y->var_id; + jac->i[2 * j + 1] = j + x->var_id; + jac->p[j] = 2 * j; } } - node->jacobian->p[node->size] = 2 * node->size; + jac->p[node->size] = 2 * node->size; + node->jacobian = new_sparse_matrix(jac); } static void eval_jacobian_vector_args(expr *node) { expr *x = node->left; expr *y = node->right; - /* if x has lower variable idx than y */ if (x->var_id < y->var_id) { @@ -101,7 +102,7 @@ static void eval_jacobian_vector_args(expr *node) static void wsum_hess_init_vector_args(expr *node) { - node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, 4 * node->size); + CSR_matrix *H = new_CSR_matrix(node->n_vars, node->n_vars, 4 * node->size); expr *x = node->left; expr *y = node->right; @@ -121,9 +122,9 @@ static void wsum_hess_init_vector_args(expr *node) /* var1 rows of Hessian */ for (i = 0; i < node->size; i++) { - node->wsum_hess->p[var1_id + i] = 2 * i; - node->wsum_hess->i[2 * i] = var1_id + i; - node->wsum_hess->i[2 * i + 1] = var2_id + i; + H->p[var1_id + i] = 2 * i; + H->i[2 * i] = var1_id + i; + H->i[2 * i + 1] = var2_id + i; } int nnz = 2 * node->size; @@ -131,21 +132,22 @@ static void wsum_hess_init_vector_args(expr *node) /* rows between var1 and var2 */ for (i = var1_id + node->size; i < var2_id; i++) { - node->wsum_hess->p[i] = nnz; + H->p[i] = nnz; } /* var2 rows of Hessian */ for (i = 0; i < node->size; i++) { - node->wsum_hess->p[var2_id + i] = nnz + 2 * i; + H->p[var2_id + i] = nnz + 2 * i; } - memcpy(node->wsum_hess->i + nnz, node->wsum_hess->i, nnz * sizeof(int)); + memcpy(H->i + nnz, H->i, nnz * sizeof(int)); /* remaining rows */ for (i = var2_id + node->size; i <= node->n_vars; i++) { - node->wsum_hess->p[i] = 4 * node->size; + H->p[i] = 4 * node->size; } + node->wsum_hess = new_sparse_matrix(H); } static void eval_wsum_hess_vector_args(expr *node, const double *w) diff --git a/src/atoms/bivariate_restricted_dom/rel_entr_scalar_vector.c b/src/atoms/bivariate_restricted_dom/rel_entr_scalar_vector.c index 5f09cc9..a356289 100644 --- a/src/atoms/bivariate_restricted_dom/rel_entr_scalar_vector.c +++ b/src/atoms/bivariate_restricted_dom/rel_entr_scalar_vector.c @@ -16,6 +16,7 @@ * limitations under the License. */ #include "atoms/bivariate_restricted_dom.h" +#include "utils/sparse_matrix.h" #include "utils/tracked_alloc.h" #include #include @@ -50,35 +51,35 @@ static void jacobian_init_scalar_vector(expr *node) assert(x->var_id != NOT_A_VARIABLE && y->var_id != NOT_A_VARIABLE); assert(x->var_id != y->var_id); - node->jacobian = new_csr_matrix(node->size, node->n_vars, 2 * node->size); + CSR_matrix *jac = new_CSR_matrix(node->size, node->n_vars, 2 * node->size); if (x->var_id < y->var_id) { for (int j = 0; j < node->size; j++) { - node->jacobian->i[2 * j] = x->var_id; - node->jacobian->i[2 * j + 1] = y->var_id + j; - node->jacobian->p[j] = 2 * j; + jac->i[2 * j] = x->var_id; + jac->i[2 * j + 1] = y->var_id + j; + jac->p[j] = 2 * j; } } else { for (int j = 0; j < node->size; j++) { - node->jacobian->i[2 * j] = y->var_id + j; - node->jacobian->i[2 * j + 1] = x->var_id; - node->jacobian->p[j] = 2 * j; + jac->i[2 * j] = y->var_id + j; + jac->i[2 * j + 1] = x->var_id; + jac->p[j] = 2 * j; } } - node->jacobian->p[node->size] = 2 * node->size; + jac->p[node->size] = 2 * node->size; + node->jacobian = new_sparse_matrix(jac); } static void eval_jacobian_scalar_vector(expr *node) { expr *x = node->left; expr *y = node->right; - if (x->var_id < y->var_id) { for (int i = 0; i < node->size; i++) @@ -104,8 +105,8 @@ static void wsum_hess_init_scalar_vector(expr *node) int var_id_x = x->var_id; int var_id_y = y->var_id; - node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, 3 * node->size + 1); - CSR_Matrix *H = node->wsum_hess; + CSR_matrix *H = + new_CSR_matrix(node->n_vars, node->n_vars, 3 * node->size + 1); if (var_id_x < var_id_y) { @@ -161,6 +162,7 @@ static void wsum_hess_init_scalar_vector(expr *node) H->p[i] = 3 * node->size + 1; } } + node->wsum_hess = new_sparse_matrix(H); } static void eval_wsum_hess_scalar_vector(expr *node, const double *w) diff --git a/src/atoms/bivariate_restricted_dom/rel_entr_vector_scalar.c b/src/atoms/bivariate_restricted_dom/rel_entr_vector_scalar.c index f425357..f2b7928 100644 --- a/src/atoms/bivariate_restricted_dom/rel_entr_vector_scalar.c +++ b/src/atoms/bivariate_restricted_dom/rel_entr_vector_scalar.c @@ -16,6 +16,7 @@ * limitations under the License. */ #include "atoms/bivariate_restricted_dom.h" +#include "utils/sparse_matrix.h" #include "utils/tracked_alloc.h" #include #include @@ -50,35 +51,35 @@ static void jacobian_init_vector_scalar(expr *node) assert(x->var_id != NOT_A_VARIABLE && y->var_id != NOT_A_VARIABLE); assert(x->var_id != y->var_id); - node->jacobian = new_csr_matrix(node->size, node->n_vars, 2 * node->size); + CSR_matrix *jac = new_CSR_matrix(node->size, node->n_vars, 2 * node->size); if (x->var_id < y->var_id) { for (int j = 0; j < node->size; j++) { - node->jacobian->i[2 * j] = x->var_id + j; - node->jacobian->i[2 * j + 1] = y->var_id; - node->jacobian->p[j] = 2 * j; + jac->i[2 * j] = x->var_id + j; + jac->i[2 * j + 1] = y->var_id; + jac->p[j] = 2 * j; } } else { for (int j = 0; j < node->size; j++) { - node->jacobian->i[2 * j] = y->var_id; - node->jacobian->i[2 * j + 1] = x->var_id + j; - node->jacobian->p[j] = 2 * j; + jac->i[2 * j] = y->var_id; + jac->i[2 * j + 1] = x->var_id + j; + jac->p[j] = 2 * j; } } - node->jacobian->p[node->size] = 2 * node->size; + jac->p[node->size] = 2 * node->size; + node->jacobian = new_sparse_matrix(jac); } static void eval_jacobian_vector_scalar(expr *node) { expr *x = node->left; expr *y = node->right; - if (x->var_id < y->var_id) { for (int i = 0; i < node->size; i++) @@ -104,8 +105,8 @@ static void wsum_hess_init_vector_scalar(expr *node) int var_id_x = x->var_id; int var_id_y = y->var_id; - node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, 3 * node->size + 1); - CSR_Matrix *H = node->wsum_hess; + CSR_matrix *H = + new_CSR_matrix(node->n_vars, node->n_vars, 3 * node->size + 1); if (var_id_x < var_id_y) { @@ -161,6 +162,7 @@ static void wsum_hess_init_vector_scalar(expr *node) H->p[i] = 3 * node->size + 1; } } + node->wsum_hess = new_sparse_matrix(H); } static void eval_wsum_hess_vector_scalar(expr *node, const double *w) diff --git a/src/atoms/elementwise_full_dom/common.c b/src/atoms/elementwise_full_dom/common.c index b9735a4..352f784 100644 --- a/src/atoms/elementwise_full_dom/common.c +++ b/src/atoms/elementwise_full_dom/common.c @@ -17,9 +17,10 @@ */ #include "atoms/elementwise_full_dom.h" #include "subexpr.h" -#include "utils/CSC_Matrix.h" -#include "utils/CSR_Matrix.h" -#include "utils/CSR_sum.h" +#include "utils/CSC_matrix.h" +#include "utils/CSR_matrix.h" +#include "utils/matrix_sum.h" +#include "utils/sparse_matrix.h" #include "utils/tracked_alloc.h" #include #include @@ -32,20 +33,20 @@ void jacobian_init_elementwise(expr *node) /* if the variable is a child */ if (child->var_id != NOT_A_VARIABLE) { - node->jacobian = new_csr_matrix(node->size, node->n_vars, node->size); + CSR_matrix *jac = new_CSR_matrix(node->size, node->n_vars, node->size); for (int j = 0; j < node->size; j++) { - node->jacobian->p[j] = j; - node->jacobian->i[j] = j + child->var_id; + jac->p[j] = j; + jac->i[j] = j + child->var_id; } - node->jacobian->p[node->size] = node->size; + jac->p[node->size] = node->size; + node->jacobian = new_sparse_matrix(jac); } else { /* jacobian of h(x) = f(g(x)) is Jf @ Jg, and here Jf is diagonal */ jacobian_init(child); - CSR_Matrix *Jg = child->jacobian; - node->jacobian = new_csr_copy_sparsity(Jg); + node->jacobian = child->jacobian->copy_sparsity(child->jacobian); node->work->dwork = (double *) SP_MALLOC(node->size * sizeof(double)); node->work->local_jac_diag = (double *) SP_MALLOC(node->size * sizeof(double)); @@ -64,11 +65,11 @@ void eval_jacobian_elementwise(expr *node) { /* jacobian of h(x) = f(g(x)) is Jf @ Jg, and here Jf is diagonal */ child->eval_jacobian(child); - CSR_Matrix *Jg = child->jacobian; node->local_jacobian(node, node->work->local_jac_diag); memcpy(node->work->dwork, node->work->local_jac_diag, node->size * sizeof(double)); - DA_fill_values(node->work->dwork, Jg, node->jacobian); + child->jacobian->DA_fill_values(node->work->dwork, child->jacobian, + node->jacobian); } } @@ -81,18 +82,19 @@ void wsum_hess_init_elementwise(expr *node) /* if the variable is a child */ if (id != NOT_A_VARIABLE) { - node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, node->size); + CSR_matrix *hess = new_CSR_matrix(node->n_vars, node->n_vars, node->size); for (i = 0; i < node->size; i++) { - node->wsum_hess->p[id + i] = i; - node->wsum_hess->i[i] = id + i; + hess->p[id + i] = i; + hess->i[i] = id + i; } for (i = id + node->size; i <= node->n_vars; i++) { - node->wsum_hess->p[i] = node->size; + hess->p[i] = node->size; } + node->wsum_hess = new_sparse_matrix(hess); } else { @@ -101,28 +103,27 @@ void wsum_hess_init_elementwise(expr *node) term2 = sum_i (J_f^T w)_i^T Hg_i. For elementwise functions, D is diagonal. */ - jacobian_csc_init(child); - CSC_Matrix *Jg = child->work->jacobian_csc; - if (child->is_affine(child)) { - node->wsum_hess = ATA_alloc(Jg); + node->wsum_hess = child->jacobian->ATA_alloc(child->jacobian); } else { /* term1: Jg^T @ D @ Jg */ - node->work->hess_term1 = ATA_alloc(Jg); + node->work->hess_term1 = child->jacobian->ATA_alloc(child->jacobian); - /* term2: child's Hessian */ + /* term2: child's Hessian (mirror its sparsity polymorphically) */ wsum_hess_init(child); - CSR_Matrix *Hg = child->wsum_hess; - node->work->hess_term2 = new_csr_copy_sparsity(Hg); + node->work->hess_term2 = + child->wsum_hess->copy_sparsity(child->wsum_hess); /* wsum_hess = term1 + term2 */ - int max_nnz = node->work->hess_term1->nnz + node->work->hess_term2->nnz; - node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, max_nnz); - sum_csr_alloc(node->work->hess_term1, node->work->hess_term2, - node->wsum_hess); + int max_nnz = + node->work->hess_term1->nnz + node->work->hess_term2->nnz; + node->wsum_hess = + new_sparse_matrix_alloc(node->n_vars, node->n_vars, max_nnz); + sum_matrices_alloc(node->work->hess_term1, node->work->hess_term2, + node->wsum_hess); } } } @@ -139,27 +140,27 @@ void eval_wsum_hess_elementwise(expr *node, const double *w) { if (child->is_affine(child)) { + /* Refresh the child Jacobian's CSC_matrix mirror once; subsequent calls + skip since the affine child's values don't change. */ if (!child->work->jacobian_csc_filled) { - csr_to_csc_fill_values(child->jacobian, child->work->jacobian_csc, - child->work->csc_work); + child->jacobian->refresh_csc_values(child->jacobian); child->work->jacobian_csc_filled = true; } node->local_wsum_hess(node, node->work->dwork, w); - ATDA_fill_values(child->work->jacobian_csc, node->work->dwork, - node->wsum_hess); + child->jacobian->ATDA_fill_values(child->jacobian, node->work->dwork, + node->wsum_hess); } else { - /* refresh CSC jacobian values */ - csr_to_csc_fill_values(child->jacobian, child->work->jacobian_csc, - child->work->csc_work); + /* Non-affine child: values change every iteration, must refresh. */ + child->jacobian->refresh_csc_values(child->jacobian); /* term1: Jg^T @ D @ Jg */ node->local_wsum_hess(node, node->work->dwork, w); - ATDA_fill_values(child->work->jacobian_csc, node->work->dwork, - node->work->hess_term1); + child->jacobian->ATDA_fill_values(child->jacobian, node->work->dwork, + node->work->hess_term1); /* term2: child Hessian with weight Jf^T w */ memcpy(node->work->dwork, node->work->local_jac_diag, @@ -174,8 +175,8 @@ void eval_wsum_hess_elementwise(expr *node, const double *w) child->wsum_hess->nnz * sizeof(double)); /* wsum_hess = term1 + term2 */ - sum_csr_fill_values(node->work->hess_term1, node->work->hess_term2, - node->wsum_hess); + sum_matrices_fill_values(node->work->hess_term1, + node->work->hess_term2, node->wsum_hess); } } } diff --git a/src/atoms/elementwise_restricted_dom/atanh.c b/src/atoms/elementwise_restricted_dom/atanh.c index 79a08f2..c2c71c2 100644 --- a/src/atoms/elementwise_restricted_dom/atanh.c +++ b/src/atoms/elementwise_restricted_dom/atanh.c @@ -30,19 +30,21 @@ static void atanh_forward(expr *node, const double *u) static void atanh_eval_jacobian(expr *node) { double *x = node->left->value; + double *jx = node->jacobian->x; for (int j = 0; j < node->size; j++) { - node->jacobian->x[j] = 1.0 / (1.0 - x[j] * x[j]); + jx[j] = 1.0 / (1.0 - x[j] * x[j]); } } static void atanh_eval_wsum_hess(expr *node, const double *w) { double *x = node->left->value; + double *hx = node->wsum_hess->x; for (int j = 0; j < node->size; j++) { double c = 1.0 - x[j] * x[j]; - node->wsum_hess->x[j] = w[j] * (2.0 * x[j]) / (c * c); + hx[j] = w[j] * (2.0 * x[j]) / (c * c); } } diff --git a/src/atoms/elementwise_restricted_dom/common.c b/src/atoms/elementwise_restricted_dom/common.c index 9a2f561..5c88733 100644 --- a/src/atoms/elementwise_restricted_dom/common.c +++ b/src/atoms/elementwise_restricted_dom/common.c @@ -16,6 +16,7 @@ * limitations under the License. */ #include "atoms/elementwise_restricted_dom.h" +#include "utils/sparse_matrix.h" #include "utils/tracked_alloc.h" #include @@ -23,13 +24,14 @@ void jacobian_init_restricted(expr *node) { expr *child = node->left; - node->jacobian = new_csr_matrix(node->size, node->n_vars, node->size); + CSR_matrix *jac = new_CSR_matrix(node->size, node->n_vars, node->size); for (int j = 0; j < node->size; j++) { - node->jacobian->p[j] = j; - node->jacobian->i[j] = j + child->var_id; + jac->p[j] = j; + jac->i[j] = j + child->var_id; } - node->jacobian->p[node->size] = node->size; + jac->p[node->size] = node->size; + node->jacobian = new_sparse_matrix(jac); } void wsum_hess_init_restricted(expr *node) @@ -38,18 +40,19 @@ void wsum_hess_init_restricted(expr *node) int id = child->var_id; int i; - node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, node->size); + CSR_matrix *hess = new_CSR_matrix(node->n_vars, node->n_vars, node->size); for (i = 0; i < node->size; i++) { - node->wsum_hess->p[id + i] = i; - node->wsum_hess->i[i] = id + i; + hess->p[id + i] = i; + hess->i[i] = id + i; } for (i = id + node->size; i <= node->n_vars; i++) { - node->wsum_hess->p[i] = node->size; + hess->p[i] = node->size; } + node->wsum_hess = new_sparse_matrix(hess); } bool is_affine_restricted(const expr *node) diff --git a/src/atoms/elementwise_restricted_dom/entr.c b/src/atoms/elementwise_restricted_dom/entr.c index 53ac275..c3d29cb 100644 --- a/src/atoms/elementwise_restricted_dom/entr.c +++ b/src/atoms/elementwise_restricted_dom/entr.c @@ -32,18 +32,20 @@ static void entr_forward(expr *node, const double *u) static void entr_eval_jacobian(expr *node) { double *x = node->left->value; + double *jx = node->jacobian->x; for (int j = 0; j < node->size; j++) { - node->jacobian->x[j] = -log(x[j]) - 1.0; + jx[j] = -log(x[j]) - 1.0; } } static void entr_eval_wsum_hess(expr *node, const double *w) { double *x = node->left->value; + double *hx = node->wsum_hess->x; for (int j = 0; j < node->size; j++) { - node->wsum_hess->x[j] = -w[j] / x[j]; + hx[j] = -w[j] / x[j]; } } diff --git a/src/atoms/elementwise_restricted_dom/log.c b/src/atoms/elementwise_restricted_dom/log.c index ce96425..d9753e1 100644 --- a/src/atoms/elementwise_restricted_dom/log.c +++ b/src/atoms/elementwise_restricted_dom/log.c @@ -31,18 +31,20 @@ static void log_forward(expr *node, const double *u) static void log_eval_jacobian(expr *node) { double *x = node->left->value; + double *jx = node->jacobian->x; for (int j = 0; j < node->size; j++) { - node->jacobian->x[j] = 1.0 / x[j]; + jx[j] = 1.0 / x[j]; } } static void log_eval_wsum_hess(expr *node, const double *w) { double *x = node->left->value; + double *hx = node->wsum_hess->x; for (int j = 0; j < node->size; j++) { - node->wsum_hess->x[j] = -w[j] / (x[j] * x[j]); + hx[j] = -w[j] / (x[j] * x[j]); } } diff --git a/src/atoms/elementwise_restricted_dom/tan.c b/src/atoms/elementwise_restricted_dom/tan.c index ccf945e..3a4e4dc 100644 --- a/src/atoms/elementwise_restricted_dom/tan.c +++ b/src/atoms/elementwise_restricted_dom/tan.c @@ -30,20 +30,22 @@ static void tan_forward(expr *node, const double *u) static void tan_eval_jacobian(expr *node) { double *x = node->left->value; + double *jx = node->jacobian->x; for (int j = 0; j < node->size; j++) { double c = cos(x[j]); - node->jacobian->x[j] = 1.0 / (c * c); + jx[j] = 1.0 / (c * c); } } static void tan_eval_wsum_hess(expr *node, const double *w) { double *x = node->left->value; + double *hx = node->wsum_hess->x; for (int j = 0; j < node->size; j++) { double c = cos(x[j]); - node->wsum_hess->x[j] = 2.0 * w[j] * node->value[j] / (c * c); + hx[j] = 2.0 * w[j] * node->value[j] / (c * c); } } diff --git a/src/atoms/other/prod.c b/src/atoms/other/prod.c index 083e4c1..aeb7226 100644 --- a/src/atoms/other/prod.c +++ b/src/atoms/other/prod.c @@ -16,6 +16,7 @@ * limitations under the License. */ #include "atoms/non_elementwise_full_dom.h" +#include "utils/sparse_matrix.h" #include "utils/tracked_alloc.h" #include #include @@ -71,13 +72,14 @@ static void jacobian_init_impl(expr *node) /* if x is a variable */ if (x->var_id != NOT_A_VARIABLE) { - node->jacobian = new_csr_matrix(1, node->n_vars, x->size); - node->jacobian->p[0] = 0; - node->jacobian->p[1] = x->size; + CSR_matrix *jac = new_CSR_matrix(1, node->n_vars, x->size); + jac->p[0] = 0; + jac->p[1] = x->size; for (int j = 0; j < x->size; j++) { - node->jacobian->i[j] = x->var_id + j; + jac->i[j] = x->var_id + j; } + node->jacobian = new_sparse_matrix(jac); } else { @@ -96,21 +98,22 @@ static void eval_jacobian(expr *node) /* if x is a variable */ if (x->var_id != NOT_A_VARIABLE) { + double *jx = node->jacobian->x; if (num_of_zeros == 0) { for (int j = 0; j < x->size; j++) { - node->jacobian->x[j] = node->value[0] / x->value[j]; + jx[j] = node->value[0] / x->value[j]; } } else if (num_of_zeros == 1) { - memset(node->jacobian->x, 0, sizeof(double) * x->size); - node->jacobian->x[pnode->zero_index] = pnode->prod_nonzero; + memset(jx, 0, sizeof(double) * x->size); + jx[pnode->zero_index] = pnode->prod_nonzero; } else { - memset(node->jacobian->x, 0, sizeof(double) * x->size); + memset(jx, 0, sizeof(double) * x->size); } } else @@ -128,21 +131,21 @@ static void wsum_hess_init_impl(expr *node) /* if x is a variable */ if (x->var_id != NOT_A_VARIABLE) { - /* allocate n_vars x n_vars CSR matrix with dense block */ + /* allocate n_vars x n_vars CSR_matrix matrix with dense block */ int block_size = x->size; int nnz = block_size * block_size; - node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, nnz); + CSR_matrix *hess = new_CSR_matrix(node->n_vars, node->n_vars, nnz); /* fill row pointers for the dense block */ for (int i = 0; i < block_size; i++) { - node->wsum_hess->p[x->var_id + i] = i * block_size; + hess->p[x->var_id + i] = i * block_size; } /* fill row pointers for rows after the block */ for (int i = x->var_id + block_size; i <= node->n_vars; i++) { - node->wsum_hess->p[i] = nnz; + hess->p[i] = nnz; } /* fill column indices for the dense block */ @@ -150,9 +153,10 @@ static void wsum_hess_init_impl(expr *node) { for (int j = 0; j < block_size; j++) { - node->wsum_hess->i[i * block_size + j] = x->var_id + j; + hess->i[i * block_size + j] = x->var_id + j; } } + node->wsum_hess = new_sparse_matrix(hess); } else { diff --git a/src/atoms/other/prod_axis_one.c b/src/atoms/other/prod_axis_one.c index ac2583e..2e0e971 100644 --- a/src/atoms/other/prod_axis_one.c +++ b/src/atoms/other/prod_axis_one.c @@ -16,6 +16,7 @@ * limitations under the License. */ #include "atoms/non_elementwise_full_dom.h" +#include "utils/sparse_matrix.h" #include "utils/tracked_alloc.h" #include #include @@ -81,14 +82,14 @@ static void jacobian_init_impl(expr *node) /* if x is a variable */ if (x->var_id != NOT_A_VARIABLE) { - node->jacobian = new_csr_matrix(node->size, node->n_vars, x->size); + CSR_matrix *jac = new_CSR_matrix(node->size, node->n_vars, x->size); /* set row pointers (each row has d2 nnzs) */ for (int row = 0; row < x->d1; row++) { - node->jacobian->p[row] = row * x->d2; + jac->p[row] = row * x->d2; } - node->jacobian->p[x->d1] = x->size; + jac->p[x->d1] = x->size; /* set column indices */ for (int row = 0; row < x->d1; row++) @@ -96,9 +97,10 @@ static void jacobian_init_impl(expr *node) int start = row * x->d2; for (int col = 0; col < x->d2; col++) { - node->jacobian->i[start + col] = x->var_id + col * x->d1 + row; + jac->i[start + col] = x->var_id + col * x->d1 + row; } } + node->jacobian = new_sparse_matrix(jac); } else { @@ -161,8 +163,7 @@ static void wsum_hess_init_impl(expr *node) /* each row i has d2-1 non-zero entries, with column indices corresponding to the columns in that row (except the diagonal element). */ int nnz = x->d1 * x->d2 * (x->d2 - 1); - node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, nnz); - CSR_Matrix *H = node->wsum_hess; + CSR_matrix *H = new_CSR_matrix(node->n_vars, node->n_vars, nnz); /* fill sparsity pattern */ int nnz_per_row = x->d2 - 1; @@ -192,6 +193,7 @@ static void wsum_hess_init_impl(expr *node) { H->p[i] = nnz; } + node->wsum_hess = new_sparse_matrix(H); } else { @@ -205,7 +207,7 @@ static inline void wsum_hess_row_no_zeros(expr *node, const double *w, int row, int d2) { expr *x = node->left; - CSR_Matrix *H = node->wsum_hess; + CSR_matrix *H = node->wsum_hess->to_csr(node->wsum_hess); double scale = w[row] * node->value[row]; /* for each variable xk in this row, fill in Hessian entries @@ -232,7 +234,7 @@ static inline void wsum_hess_row_one_zero(expr *node, const double *w, int row, { expr *x = node->left; prod_axis *pnode = (prod_axis *) node; - CSR_Matrix *H = node->wsum_hess; + CSR_matrix *H = node->wsum_hess->to_csr(node->wsum_hess); double *H_vals = H->x; int p = pnode->zero_index[row]; /* zero column index */ double w_prod = w[row] * pnode->prod_nonzero[row]; @@ -277,7 +279,7 @@ static inline void wsum_hess_row_two_zeros(expr *node, const double *w, int row, { expr *x = node->left; prod_axis *pnode = (prod_axis *) node; - CSR_Matrix *H = node->wsum_hess; + CSR_matrix *H = node->wsum_hess->to_csr(node->wsum_hess); double *H_vals = H->x; /* find indices p and q where row has zeros */ @@ -330,7 +332,7 @@ static inline void wsum_hess_row_two_zeros(expr *node, const double *w, int row, static inline void wsum_hess_row_many_zeros(expr *node, int row, int d2) { - CSR_Matrix *H = node->wsum_hess; + CSR_matrix *H = node->wsum_hess->to_csr(node->wsum_hess); double *H_vals = H->x; expr *x = node->left; diff --git a/src/atoms/other/prod_axis_zero.c b/src/atoms/other/prod_axis_zero.c index 20fac1b..55bedd2 100644 --- a/src/atoms/other/prod_axis_zero.c +++ b/src/atoms/other/prod_axis_zero.c @@ -16,6 +16,7 @@ * limitations under the License. */ #include "atoms/non_elementwise_full_dom.h" +#include "utils/sparse_matrix.h" #include "utils/tracked_alloc.h" #include #include @@ -76,14 +77,14 @@ static void jacobian_init_impl(expr *node) /* if x is a variable */ if (x->var_id != NOT_A_VARIABLE) { - node->jacobian = new_csr_matrix(node->size, node->n_vars, x->size); + CSR_matrix *jac = new_CSR_matrix(node->size, node->n_vars, x->size); /* set row pointers (each row has d1 nnzs) */ for (int row = 0; row < x->d2; row++) { - node->jacobian->p[row] = row * x->d1; + jac->p[row] = row * x->d1; } - node->jacobian->p[x->d2] = x->size; + jac->p[x->d2] = x->size; /* set column indices */ for (int col = 0; col < x->d2; col++) @@ -91,9 +92,10 @@ static void jacobian_init_impl(expr *node) int start = col * x->d1; for (int i = 0; i < x->d1; i++) { - node->jacobian->i[start + i] = x->var_id + start + i; + jac->i[start + i] = x->var_id + start + i; } } + node->jacobian = new_sparse_matrix(jac); } else { @@ -154,8 +156,7 @@ static void wsum_hess_init_impl(expr *node) { /* Hessian has block diagonal structure: d2 blocks of size d1 x d1 */ int nnz = x->d2 * x->d1 * x->d1; - node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, nnz); - CSR_Matrix *H = node->wsum_hess; + CSR_matrix *H = new_CSR_matrix(node->n_vars, node->n_vars, nnz); /* fill row pointers for the variable's rows (block diagonal) */ for (int i = 0; i < x->size; i++) @@ -184,6 +185,7 @@ static void wsum_hess_init_impl(expr *node) } } } + node->wsum_hess = new_sparse_matrix(H); } else { diff --git a/src/atoms/other/quad_form.c b/src/atoms/other/quad_form.c index 72474df..d49c16b 100644 --- a/src/atoms/other/quad_form.c +++ b/src/atoms/other/quad_form.c @@ -17,9 +17,10 @@ */ #include "atoms/non_elementwise_full_dom.h" #include "subexpr.h" -#include "utils/CSC_Matrix.h" -#include "utils/CSR_sum.h" +#include "utils/CSC_matrix.h" +#include "utils/matrix_sum.h" #include "utils/cblas_wrapper.h" +#include "utils/sparse_matrix.h" #include "utils/tracked_alloc.h" #include #include @@ -35,7 +36,7 @@ static void forward(expr *node, const double *u) x->forward(x, u); /* local forward pass */ - CSR_Matrix *Q = ((quad_form_expr *) node)->Q; + CSR_matrix *Q = ((quad_form_expr *) node)->Q; Ax_csr(Q, x->value, node->work->dwork, 0); node->value[0] = 0.0; @@ -51,27 +52,28 @@ static void jacobian_init_impl(expr *node) if (x->var_id != NOT_A_VARIABLE) { - node->jacobian = new_csr_matrix(1, node->n_vars, x->size); - node->jacobian->p[0] = 0; - node->jacobian->p[1] = x->size; + CSR_matrix *jac = new_CSR_matrix(1, node->n_vars, x->size); + jac->p[0] = 0; + jac->p[1] = x->size; for (int j = 0; j < x->size; j++) { - node->jacobian->i[j] = x->var_id + j; + jac->i[j] = x->var_id + j; } + node->jacobian = new_sparse_matrix(jac); } else { /* chain rule: J = 2 * (Q @ f(x))^T * J_f */ jacobian_init(x); jacobian_csc_init(x); - CSC_Matrix *J_csc = x->work->jacobian_csc; + CSC_matrix *J_csc = x->work->jacobian_csc; /* allocate the right number of nnz */ int nnz = count_nonzero_cols_csc(J_csc); - node->jacobian = new_csr_matrix(1, node->n_vars, nnz); - node->jacobian->p[0] = 0; - node->jacobian->p[1] = nnz; + CSR_matrix *jac = new_CSR_matrix(1, node->n_vars, nnz); + jac->p[0] = 0; + jac->p[1] = nnz; /* fill sparsity pattern */ int idx = 0; @@ -79,22 +81,24 @@ static void jacobian_init_impl(expr *node) { if (J_csc->p[j + 1] > J_csc->p[j]) { - node->jacobian->i[idx++] = j; + jac->i[idx++] = j; } } + node->jacobian = new_sparse_matrix(jac); } } static void eval_jacobian(expr *node) { expr *x = node->left; - CSR_Matrix *Q = ((quad_form_expr *) node)->Q; + CSR_matrix *Q = ((quad_form_expr *) node)->Q; + CSR_matrix *jac = node->jacobian->to_csr(node->jacobian); if (x->var_id != NOT_A_VARIABLE) { /* jacobian = 2 * (Q @ x)^T */ - Ax_csr(Q, x->value, node->jacobian->x, 0); - cblas_dscal(x->size, 2.0, node->jacobian->x, 1); + Ax_csr(Q, x->value, jac->x, 0); + cblas_dscal(x->size, 2.0, jac->x, 1); } else { @@ -103,7 +107,7 @@ static void eval_jacobian(expr *node) if (!x->work->jacobian_csc_filled) { - csr_to_csc_fill_values(x->jacobian, x->work->jacobian_csc, + csr_to_csc_fill_values(x->jacobian->to_csr(x->jacobian), x->work->jacobian_csc, x->work->csc_work); if (x->is_affine(x)) @@ -114,20 +118,20 @@ static void eval_jacobian(expr *node) /* The jacobian has same values as the gradient, which is J_f^T (Q @ f(x)). Here, dwork stores Q @ f(x) from forward */ - yTA_fill_values(x->work->jacobian_csc, node->work->dwork, node->jacobian); + yTA_fill_values(x->work->jacobian_csc, node->work->dwork, jac); - cblas_dscal(node->jacobian->nnz, 2.0, node->jacobian->x, 1); + cblas_dscal(jac->nnz, 2.0, jac->x, 1); } } static void wsum_hess_init_impl(expr *node) { - CSR_Matrix *Q = ((quad_form_expr *) node)->Q; + CSR_matrix *Q = ((quad_form_expr *) node)->Q; expr *x = node->left; if (x->var_id != NOT_A_VARIABLE) { - CSR_Matrix *H = new_csr_matrix(node->n_vars, node->n_vars, Q->nnz); + CSR_matrix *H = new_CSR_matrix(node->n_vars, node->n_vars, Q->nnz); /* set global row pointers */ memcpy(H->p + x->var_id, Q->p, (x->size + 1) * sizeof(int)); @@ -142,7 +146,7 @@ static void wsum_hess_init_impl(expr *node) H->i[i] = Q->i[i] + x->var_id; } - node->wsum_hess = H; + node->wsum_hess = new_sparse_matrix(H); } else { @@ -157,28 +161,30 @@ static void wsum_hess_init_impl(expr *node) /* jacobian_csc_init(x) already called in jacobian_init */ quad_form_expr *qnode = (quad_form_expr *) node; - CSC_Matrix *Jf = x->work->jacobian_csc; + CSC_matrix *Jf = x->work->jacobian_csc; /* term1 = Jf^T W Jf = Jf^T B*/ - CSC_Matrix *B = symBA_alloc(Q, Jf); + CSC_matrix *B = symBA_alloc(Q, Jf); qnode->QJf = B; - node->work->hess_term1 = BTA_alloc(Jf, B); + node->work->hess_term1 = new_sparse_matrix(BTA_alloc(Jf, B)); /* term2 = sum_i (Qf(x))_i nabla^2 f_i */ wsum_hess_init(x); - node->work->hess_term2 = new_csr_copy_sparsity(x->wsum_hess); + node->work->hess_term2 = x->wsum_hess->copy_sparsity(x->wsum_hess); /* hess = term1 + term2 */ - int max_nnz = node->work->hess_term1->nnz + node->work->hess_term2->nnz; - node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, max_nnz); - sum_csr_alloc(node->work->hess_term1, node->work->hess_term2, - node->wsum_hess); + int max_nnz = + node->work->hess_term1->nnz + node->work->hess_term2->nnz; + node->wsum_hess = + new_sparse_matrix_alloc(node->n_vars, node->n_vars, max_nnz); + sum_matrices_alloc(node->work->hess_term1, node->work->hess_term2, + node->wsum_hess); } } static void eval_wsum_hess(expr *node, const double *w) { - CSR_Matrix *Q = ((quad_form_expr *) node)->Q; + CSR_matrix *Q = ((quad_form_expr *) node)->Q; expr *x = node->left; double two_w = 2.0 * w[0]; @@ -191,11 +197,11 @@ static void eval_wsum_hess(expr *node, const double *w) } else { - /* fill the CSC representation of the Jacobian of the child */ - CSC_Matrix *Jf = x->work->jacobian_csc; + /* fill the CSC_matrix representation of the Jacobian of the child */ + CSC_matrix *Jf = x->work->jacobian_csc; if (!x->work->jacobian_csc_filled) { - csr_to_csc_fill_values(x->jacobian, Jf, x->work->csc_work); + csr_to_csc_fill_values(x->jacobian->to_csr(x->jacobian), Jf, x->work->csc_work); if (x->is_affine(x)) { @@ -203,9 +209,8 @@ static void eval_wsum_hess(expr *node, const double *w) } } - CSC_Matrix *QJf = ((quad_form_expr *) node)->QJf; - CSR_Matrix *term1 = node->work->hess_term1; - CSR_Matrix *term2 = node->work->hess_term2; + CSC_matrix *QJf = ((quad_form_expr *) node)->QJf; + CSR_matrix *term1 = node->work->hess_term1->to_csr(node->work->hess_term1); /* term1 = J_f^T Q J_f = J_f^T B */ BA_fill_values(Q, Jf, QJf); @@ -213,25 +218,27 @@ static void eval_wsum_hess(expr *node, const double *w) /* term2 */ x->eval_wsum_hess(x, node->work->dwork); - memcpy(term2->x, x->wsum_hess->x, x->wsum_hess->nnz * sizeof(double)); + memcpy(node->work->hess_term2->x, x->wsum_hess->x, + x->wsum_hess->nnz * sizeof(double)); /* scale both terms by 2w */ - cblas_dscal(term1->nnz, two_w, term1->x, 1); - cblas_dscal(term2->nnz, two_w, term2->x, 1); + cblas_dscal(node->work->hess_term1->nnz, two_w, node->work->hess_term1->x, 1); + cblas_dscal(node->work->hess_term2->nnz, two_w, node->work->hess_term2->x, 1); /* sum the two terms */ - sum_csr_fill_values(term1, term2, node->wsum_hess); + sum_matrices_fill_values(node->work->hess_term1, node->work->hess_term2, + node->wsum_hess); } } static void free_type_data(expr *node) { quad_form_expr *qnode = (quad_form_expr *) node; - free_csr_matrix(qnode->Q); + free_CSR_matrix(qnode->Q); qnode->Q = NULL; if (qnode->QJf != NULL) { - free_csc_matrix(qnode->QJf); + free_CSC_matrix(qnode->QJf); qnode->QJf = NULL; } } @@ -243,7 +250,7 @@ static bool is_affine(const expr *node) return false; } -expr *new_quad_form(expr *left, CSR_Matrix *Q) +expr *new_quad_form(expr *left, CSR_matrix *Q) { assert(left->d1 == 1 || left->d2 == 1); /* left must be a vector */ quad_form_expr *qnode = (quad_form_expr *) SP_CALLOC(1, sizeof(quad_form_expr)); @@ -255,8 +262,8 @@ expr *new_quad_form(expr *left, CSR_Matrix *Q) expr_retain(left); /* Set type-specific field */ - qnode->Q = new_csr_matrix(Q->m, Q->n, Q->nnz); - copy_csr_matrix(Q, qnode->Q); + qnode->Q = new_CSR_matrix(Q->m, Q->n, Q->nnz); + copy_CSR_matrix(Q, qnode->Q); /* dwork stores the result of Q @ f(x) in the forward pass */ node->work->dwork = (double *) SP_MALLOC(left->size * sizeof(double)); diff --git a/src/expr.c b/src/expr.c index 64f0410..b99b266 100644 --- a/src/expr.c +++ b/src/expr.c @@ -16,7 +16,7 @@ * limitations under the License. */ #include "expr.h" -#include "utils/CSC_Matrix.h" +#include "utils/CSC_matrix.h" #include "utils/int_double_pair.h" #include "utils/tracked_alloc.h" #include @@ -52,7 +52,7 @@ void jacobian_csc_init(expr *node) } node->work->csc_work = (int *) SP_MALLOC(node->n_vars * sizeof(int)); node->work->jacobian_csc = - csr_to_csc_alloc(node->jacobian, node->work->csc_work); + csr_to_csc_alloc(node->jacobian->to_csr(node->jacobian), node->work->csc_work); } void free_expr(expr *node) @@ -76,19 +76,19 @@ void free_expr(expr *node) /* free value array and derivative matrices */ free(node->value); - free_csr_matrix(node->jacobian); - free_csr_matrix(node->wsum_hess); + free_matrix(node->jacobian); + free_matrix(node->wsum_hess); /* free workspace */ if (node->work) { free(node->work->dwork); free(node->work->iwork); - free_csc_matrix(node->work->jacobian_csc); + free_CSC_matrix(node->work->jacobian_csc); free(node->work->csc_work); free(node->work->local_jac_diag); - free_csr_matrix(node->work->hess_term1); - free_csr_matrix(node->work->hess_term2); + free_matrix(node->work->hess_term1); + free_matrix(node->work->hess_term2); free(node->work); } diff --git a/src/old-code/linear_op.c b/src/old-code/linear_op.c index a63bc4d..8637ee6 100644 --- a/src/old-code/linear_op.c +++ b/src/old-code/linear_op.c @@ -17,7 +17,8 @@ */ #include "old-code/old_affine.h" #include "subexpr.h" -#include "utils/CSR_Matrix.h" +#include "utils/CSR_matrix.h" +#include "utils/sparse_matrix.h" #include "utils/tracked_alloc.h" #include #include @@ -32,7 +33,7 @@ static void forward(expr *node, const double *u) node->left->forward(node->left, u); /* y = A * x (A is stored as node->jacobian) */ - Ax_csr(node->jacobian, x->value, node->value, x->var_id); + Ax_csr(node->jacobian->to_csr(node->jacobian), x->value, node->value, x->var_id); /* y += b (if offset exists) */ if (lin_node->b != NULL) @@ -74,7 +75,8 @@ static void eval_jacobian(expr *node) static void wsum_hess_init_impl(expr *node) { /* Linear operator Hessian is always zero */ - node->wsum_hess = new_csr_matrix(node->n_vars, node->n_vars, 0); + node->wsum_hess = + new_sparse_matrix(new_CSR_matrix(node->n_vars, node->n_vars, 0)); } static void eval_wsum_hess(expr *node, const double *w) @@ -84,7 +86,7 @@ static void eval_wsum_hess(expr *node, const double *w) (void) w; } -expr *new_linear(expr *u, const CSR_Matrix *A, const double *b) +expr *new_linear(expr *u, const CSR_matrix *A, const double *b) { assert(u->d2 == 1); /* Allocate the type-specific struct */ @@ -97,8 +99,9 @@ expr *new_linear(expr *u, const CSR_Matrix *A, const double *b) expr_retain(u); /* Store A directly as the jacobian (linear op jacobian is constant) */ - node->jacobian = new_csr_matrix(A->m, A->n, A->nnz); - copy_csr_matrix(A, node->jacobian); + CSR_matrix *jac = new_CSR_matrix(A->m, A->n, A->nnz); + copy_CSR_matrix(A, jac); + node->jacobian = new_sparse_matrix(jac); /* Initialize offset (copy b if provided, otherwise NULL) */ if (b != NULL) diff --git a/src/old-code/old_CSR.c b/src/old-code/old_CSR.c index 09225ea..e4137ab 100644 --- a/src/old-code/old_CSR.c +++ b/src/old-code/old_CSR.c @@ -16,11 +16,11 @@ * limitations under the License. */ #include "old-code/old_CSR.h" -#include "utils/CSR_Matrix.h" +#include "utils/CSR_matrix.h" #include #include -CSR_Matrix *block_diag_repeat_csr(const CSR_Matrix *A, int p) +CSR_matrix *block_diag_repeat_csr(const CSR_matrix *A, int p) { assert(p > 0); @@ -28,7 +28,7 @@ CSR_Matrix *block_diag_repeat_csr(const CSR_Matrix *A, int p) int n = A->n; int nnz = A->nnz; - CSR_Matrix *A_kron = new_csr_matrix(m * p, n * p, nnz * p); + CSR_matrix *A_kron = new_CSR_matrix(m * p, n * p, nnz * p); int nnz_cursor = 0; for (int block = 0; block < p; block++) @@ -55,7 +55,7 @@ CSR_Matrix *block_diag_repeat_csr(const CSR_Matrix *A, int p) return A_kron; } -CSR_Matrix *kron_identity_csr(const CSR_Matrix *A, int p) +CSR_matrix *kron_identity_csr(const CSR_matrix *A, int p) { assert(p > 0); @@ -63,7 +63,7 @@ CSR_Matrix *kron_identity_csr(const CSR_Matrix *A, int p) int n = A->n; int nnz = A->nnz; - CSR_Matrix *A_kron = new_csr_matrix(m * p, n * p, nnz * p); + CSR_matrix *A_kron = new_CSR_matrix(m * p, n * p, nnz * p); int nnz_cursor = 0; for (int row_block = 0; row_block < m; row_block++) @@ -90,7 +90,7 @@ CSR_Matrix *kron_identity_csr(const CSR_Matrix *A, int p) return A_kron; } -void Ax_csr_fill_values(const CSR_Matrix *AT, const double *z, CSR_Matrix *C) +void Ax_csr_fill_values(const CSR_matrix *AT, const double *z, CSR_matrix *C) { int A_ncols = AT->m; @@ -116,7 +116,7 @@ void Ax_csr_fill_values(const CSR_Matrix *AT, const double *z, CSR_Matrix *C) } } -void csr_insert_value(CSR_Matrix *A, int col_idx, double value) +void csr_insert_value(CSR_matrix *A, int col_idx, double value) { assert(A->m == 1); @@ -144,7 +144,7 @@ void csr_insert_value(CSR_Matrix *A, int col_idx, double value) A->nnz++; } -void Ax_csr_wo_offset(const CSR_Matrix *A, const double *x, double *y) +void Ax_csr_wo_offset(const CSR_matrix *A, const double *x, double *y) { for (int row = 0; row < A->m; row++) { @@ -157,9 +157,9 @@ void Ax_csr_wo_offset(const CSR_Matrix *A, const double *x, double *y) } } -void diag_csr_mult(const double *d, const CSR_Matrix *A, CSR_Matrix *C) +void diag_csr_mult(const double *d, const CSR_matrix *A, CSR_matrix *C) { - copy_csr_matrix(A, C); + copy_CSR_matrix(A, C); for (int row = 0; row < C->m; row++) { diff --git a/src/old-code/old_CSR_sum.c b/src/old-code/old_CSR_sum.c index e1cea50..c6a659d 100644 --- a/src/old-code/old_CSR_sum.c +++ b/src/old-code/old_CSR_sum.c @@ -16,13 +16,13 @@ * limitations under the License. */ #include "old-code/old_CSR_sum.h" -#include "utils/CSR_Matrix.h" +#include "utils/CSR_matrix.h" #include "utils/int_double_pair.h" #include #include #include -void sum_csr_matrices(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C) +void sum_csr_matrices(const CSR_matrix *A, const CSR_matrix *B, CSR_matrix *C) { /* A and B must be different from C */ assert(A != C && B != C); @@ -84,7 +84,7 @@ void sum_csr_matrices(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C) C->p[A->m] = C->nnz; } -void sum_scaled_csr_matrices(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C, +void sum_scaled_csr_matrices(const CSR_matrix *A, const CSR_matrix *B, CSR_matrix *C, const double *d1, const double *d2) { C->nnz = 0; @@ -150,7 +150,7 @@ void sum_scaled_csr_matrices(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matri C->p[A->m] = C->nnz; } -void sum_all_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, int_double_pair *pairs) +void sum_all_rows_csr(const CSR_matrix *A, CSR_matrix *C, int_double_pair *pairs) { assert(C->m == 1); C->n = A->n; @@ -185,7 +185,7 @@ void sum_all_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, int_double_pair *pairs C->p[1] = C->nnz; } -void sum_block_of_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, +void sum_block_of_rows_csr(const CSR_matrix *A, CSR_matrix *C, int_double_pair *pairs, int row_block_size) { assert(A->m % row_block_size == 0); @@ -238,7 +238,7 @@ void sum_block_of_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, } } -void sum_evenly_spaced_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, +void sum_evenly_spaced_rows_csr(const CSR_matrix *A, CSR_matrix *C, int_double_pair *pairs, int row_spacing) { assert(C->m == row_spacing); @@ -286,7 +286,7 @@ void sum_evenly_spaced_rows_csr(const CSR_Matrix *A, CSR_Matrix *C, } } -void sum_spaced_rows_into_row_csr(const CSR_Matrix *A, CSR_Matrix *C, +void sum_spaced_rows_into_row_csr(const CSR_matrix *A, CSR_matrix *C, int_double_pair *pairs, int offset, int spacing) { assert(C->m == 1); diff --git a/src/old-code/old_permuted_dense.c b/src/old-code/old_permuted_dense.c new file mode 100644 index 0000000..453bdef --- /dev/null +++ b/src/old-code/old_permuted_dense.c @@ -0,0 +1,286 @@ +/* + * Copyright 2026 Daniel Cederberg and William Zhang + * + * This file is part of the SparseDiffEngine project. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "old-code/old_permuted_dense.h" + +#include "utils/cblas_wrapper.h" +#include "utils/tracked_alloc.h" +#include +#include + +matrix *BTA_pd_csr_alloc(const permuted_dense *B, const CSR_matrix *A) +{ + /* Cij != 0 only if i is in B's column permutation and column j of A + overlaps with column i of B. */ + + /* Gather the union of columns appearing in A's rows at positions + row_perm_B. Use a bitmap of size A->n for O(nnz) collection. */ + int p = A->n; + char *seen = (char *) SP_CALLOC(p, sizeof(char)); + int s_A = 0; + for (int kk = 0; kk < B->m0; kk++) + { + int row = B->row_perm[kk]; + for (int e = A->p[row]; e < A->p[row + 1]; e++) + { + int j = A->i[e]; + if (!seen[j]) + { + seen[j] = 1; + s_A++; + } + } + } + + int *col_active = (int *) SP_MALLOC((s_A > 0 ? s_A : 1) * sizeof(int)); + int idx = 0; + for (int j = 0; j < p; j++) + { + if (seen[j]) + { + col_active[idx++] = j; + } + } + + matrix *C = + new_permuted_dense(B->base.n, p, B->n0, s_A, B->col_perm, col_active, NULL); + free(col_active); + free(seen); + + /* Upgrade `dwork` (currently sized for the Y-role at m0_C * n0_C = B->n0 * + s_A) to fit the gather buffer A_sub_dense used by BTA_csr_pd / + BTDA_pd_csr_fill_values: shape (B->m0, s_A) row-major. The dgemm + reads it as (B->m0, s_A), so size B->m0 * s_A doubles suffices. */ + permuted_dense *C_pd = (permuted_dense *) C; + size_t gather_size = B->m0 * s_A; + if (gather_size > C_pd->dwork_size) + { + free(C_pd->dwork); + C_pd->dwork_size = gather_size; + C_pd->dwork = (double *) SP_CALLOC(gather_size, sizeof(double)); + } + return C; +} + +void BTA_pd_csr_fill_values(const permuted_dense *B, const CSR_matrix *A_csr, + permuted_dense *C) +{ + int m0 = B->m0; + int dn_B = B->n0; + int s_A = C->n0; + + if (s_A == 0 || m0 == 0) + { + /* Output dense block is empty; nothing to fill. */ + return; + } + + /* Use C->col_inv (pre-built by new_permuted_dense) as col_inv_out and + C->dwork as A_sub_dense; both are owned by C. dwork is sized at alloc + time to cover m0 * s_A; only that prefix is touched. */ + double *A_sub_dense = C->dwork; + size_t used = m0 * s_A; + memset(A_sub_dense, 0, used * sizeof(double)); + + for (int kk = 0; kk < m0; kk++) + { + int row = B->row_perm[kk]; + for (int e = A_csr->p[row]; e < A_csr->p[row + 1]; e++) + { + int j = A_csr->i[e]; + int jj = C->col_inv[j]; + /* jj should always be valid (we built col_perm from these entries), + but guard against asymmetry between alloc and fill calls. */ + if (jj >= 0) + { + A_sub_dense[kk * s_A + jj] = A_csr->x[e]; + } + } + } + + /* C->X = X_B^T @ A_sub_dense */ + cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, dn_B, s_A, m0, 1.0, B->X, + dn_B, A_sub_dense, s_A, 0.0, C->X, s_A); +} + +/* BTDA variant of BTA_csr_pd: C->X = X_B^T diag(d) A_sub_dense. Folds d + into the scatter step. */ +void BTDA_pd_csr_fill_values(const permuted_dense *B, const double *d, + const CSR_matrix *A_csr, permuted_dense *C) +{ + int m0 = B->m0; + int dn_B = B->n0; + int s_A = C->n0; + + if (s_A == 0 || m0 == 0) + { + return; + } + + double *A_sub_dense = C->dwork; + size_t used = m0 * s_A; + memset(A_sub_dense, 0, used * sizeof(double)); + + for (int kk = 0; kk < m0; kk++) + { + int row = B->row_perm[kk]; + double dk = d ? d[row] : 1.0; + for (int e = A_csr->p[row]; e < A_csr->p[row + 1]; e++) + { + int j = A_csr->i[e]; + int jj = C->col_inv[j]; + if (jj >= 0) + { + A_sub_dense[kk * s_A + jj] = dk * A_csr->x[e]; + } + } + } + + cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, dn_B, s_A, m0, 1.0, B->X, + dn_B, A_sub_dense, s_A, 0.0, C->X, s_A); +} + +/* Legacy CSR-pd kernels (B=CSR, A=PD), formerly in src/utils/permuted_dense.c. + Production now goes through BTA_csc_pd_alloc / BTDA_csc_pd_fill_values; + these are kept here for reference + direct unit tests. */ + +matrix *BTA_csr_pd_alloc(const CSR_matrix *B_csr, const permuted_dense *A) +{ + /* Gather the union of columns appearing in B's rows at positions + row_perm_A. Bitmap of size B_csr->n for O(nnz) collection. */ + int q = B_csr->n; + char *seen = (char *) SP_CALLOC(q, sizeof(char)); + int r_B = 0; + for (int kk = 0; kk < A->m0; kk++) + { + int row = A->row_perm[kk]; + for (int e = B_csr->p[row]; e < B_csr->p[row + 1]; e++) + { + int i = B_csr->i[e]; + if (!seen[i]) + { + seen[i] = 1; + r_B++; + } + } + } + + int *row_active = (int *) SP_MALLOC((r_B > 0 ? r_B : 1) * sizeof(int)); + int idx = 0; + for (int i = 0; i < q; i++) + { + if (seen[i]) + { + row_active[idx++] = i; + } + } + + matrix *C = + new_permuted_dense(q, A->base.n, r_B, A->n0, row_active, A->col_perm, NULL); + free(row_active); + free(seen); + + /* Upgrade `dwork` (currently sized for the Y-role at m0_C * n0_C = r_B * + A->n0) to fit the gather buffer B_sub_dense used by BTA_csr_pd / + BTDA_csr_pd_fill_values: shape (A->m0, r_B) row-major. */ + permuted_dense *C_pd = (permuted_dense *) C; + size_t gather_size = A->m0 * r_B; + if (gather_size > C_pd->dwork_size) + { + free(C_pd->dwork); + C_pd->dwork_size = gather_size; + C_pd->dwork = (double *) SP_CALLOC(gather_size, sizeof(double)); + } + return C; +} + +/* No-d BTA fill for the legacy CSR-pd kernel. */ +void BTA_csr_pd_fill_values(const CSR_matrix *B_csr, const permuted_dense *A, + permuted_dense *C) +{ + int m0 = A->m0; + int dn_A = A->n0; + int r_B = C->m0; + + if (r_B == 0 || m0 == 0) + { + /* Output dense block is empty; nothing to fill. */ + return; + } + + /* Use C->row_inv (pre-built by new_permuted_dense) as row_inv_out and + C->dwork as B_sub_dense; both are owned by C. dwork is sized at alloc + time to cover m0 * r_B; only that prefix is touched. */ + double *B_sub_dense = C->dwork; + size_t used = m0 * r_B; + memset(B_sub_dense, 0, used * sizeof(double)); + + for (int kk = 0; kk < m0; kk++) + { + int row = A->row_perm[kk]; + for (int e = B_csr->p[row]; e < B_csr->p[row + 1]; e++) + { + int i = B_csr->i[e]; + int ii = C->row_inv[i]; + if (ii >= 0) + { + B_sub_dense[kk * r_B + ii] = B_csr->x[e]; + } + } + } + + /* C->X = B_sub_dense^T @ X_A */ + cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, r_B, dn_A, m0, 1.0, + B_sub_dense, r_B, A->X, dn_A, 0.0, C->X, dn_A); +} + +/* BTDA variant: C->X = B_sub_dense^T diag(d) X_A. Folds d into the scatter + step. d may be NULL (treated as identity). */ +void BTDA_csr_pd_fill_values(const CSR_matrix *B_csr, const double *d, + const permuted_dense *A, permuted_dense *C) +{ + int m0 = A->m0; + int dn_A = A->n0; + int r_B = C->m0; + + if (r_B == 0 || m0 == 0) + { + return; + } + + double *B_sub_dense = C->dwork; + size_t used = m0 * r_B; + memset(B_sub_dense, 0, used * sizeof(double)); + + for (int kk = 0; kk < m0; kk++) + { + int row = A->row_perm[kk]; + double dk = d ? d[row] : 1.0; + for (int e = B_csr->p[row]; e < B_csr->p[row + 1]; e++) + { + int i = B_csr->i[e]; + int ii = C->row_inv[i]; + if (ii >= 0) + { + B_sub_dense[kk * r_B + ii] = dk * B_csr->x[e]; + } + } + } + + cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, r_B, dn_A, m0, 1.0, + B_sub_dense, r_B, A->X, dn_A, 0.0, C->X, dn_A); +} diff --git a/src/problem.c b/src/problem.c index 691921a..e462fa4 100644 --- a/src/problem.c +++ b/src/problem.c @@ -83,9 +83,10 @@ static void problem_lagrange_hess_fill_sparsity(problem *prob, int *iwork) int *cols = iwork; int *col_to_pos = iwork; /* reused after qsort */ int nnz = 0; - CSR_Matrix *H_obj = prob->objective->wsum_hess; - CSR_Matrix *H_c; - CSR_Matrix *H = prob->lagrange_hessian; + CSR_matrix *H_obj = + prob->objective->wsum_hess->to_csr(prob->objective->wsum_hess); + CSR_matrix *H_c; + CSR_matrix *H = prob->lagrange_hessian; H->p[0] = 0; // ---------------------------------------------------------------------- @@ -100,7 +101,7 @@ static void problem_lagrange_hess_fill_sparsity(problem *prob, int *iwork) /* gather columns from constraint hessians */ for (int c_idx = 0; c_idx < prob->n_constraints; c_idx++) { - H_c = constrs[c_idx]->wsum_hess; + H_c = constrs[c_idx]->wsum_hess->to_csr(constrs[c_idx]->wsum_hess); int c_len = H_c->p[row + 1] - H_c->p[row]; memcpy(cols + count, H_c->i + H_c->p[row], c_len * sizeof(int)); count += c_len; @@ -146,7 +147,7 @@ static void problem_lagrange_hess_fill_sparsity(problem *prob, int *iwork) /* map constraint hessian entries */ for (int c_idx = 0; c_idx < prob->n_constraints; c_idx++) { - H_c = constrs[c_idx]->wsum_hess; + H_c = constrs[c_idx]->wsum_hess->to_csr(constrs[c_idx]->wsum_hess); for (int row = 0; row < H->m; row++) { for (int idx = H->p[row]; idx < H->p[row + 1]; idx++) @@ -176,37 +177,39 @@ void problem_init_jacobian(problem *prob) { expr *c = prob->constraints[i]; jacobian_init(c); - nnz += c->jacobian->nnz; + CSR_matrix *Jc = c->jacobian->to_csr(c->jacobian); + nnz += Jc->nnz; if (c->is_affine(c)) { - prob->stats.nnz_affine += c->jacobian->nnz; + prob->stats.nnz_affine += Jc->nnz; } else { - prob->stats.nnz_nonlinear += c->jacobian->nnz; + prob->stats.nnz_nonlinear += Jc->nnz; } } - prob->jacobian = new_csr_matrix(prob->total_constraint_size, prob->n_vars, nnz); + prob->jacobian = new_CSR_matrix(prob->total_constraint_size, prob->n_vars, nnz); /* set sparsity pattern of jacobian */ - CSR_Matrix *H = prob->jacobian; + CSR_matrix *H = prob->jacobian; H->p[0] = 0; int row_offset = 0; int nnz_offset = 0; for (int i = 0; i < prob->n_constraints; i++) { expr *c = prob->constraints[i]; + CSR_matrix *Jc = c->jacobian->to_csr(c->jacobian); - for (int r = 1; r <= c->jacobian->m; r++) + for (int r = 1; r <= Jc->m; r++) { - H->p[row_offset + r] = nnz_offset + c->jacobian->p[r]; + H->p[row_offset + r] = nnz_offset + Jc->p[r]; } - memcpy(H->i + nnz_offset, c->jacobian->i, c->jacobian->nnz * sizeof(int)); - row_offset += c->jacobian->m; - nnz_offset += c->jacobian->nnz; + memcpy(H->i + nnz_offset, Jc->i, Jc->nnz * sizeof(int)); + row_offset += Jc->m; + nnz_offset += Jc->nnz; } assert(nnz_offset == nnz); @@ -231,7 +234,7 @@ void problem_init_hessian(problem *prob) nnz += prob->constraints[i]->wsum_hess->nnz; } - prob->lagrange_hessian = new_csr_matrix(prob->n_vars, prob->n_vars, nnz); + prob->lagrange_hessian = new_CSR_matrix(prob->n_vars, prob->n_vars, nnz); memset(prob->lagrange_hessian->x, 0, nnz * sizeof(double)); /* affine shortcut */ prob->stats.nnz_hessian = nnz; prob->hess_idx_map = (int *) SP_MALLOC(nnz * sizeof(int)); @@ -248,7 +251,7 @@ void problem_init_jacobian_coo(problem *prob) problem_init_jacobian(prob); Timer timer; clock_gettime(CLOCK_MONOTONIC, &timer.start); - prob->jacobian_coo = new_coo_matrix(prob->jacobian); + prob->jacobian_coo = new_COO_matrix(prob->jacobian); clock_gettime(CLOCK_MONOTONIC, &timer.end); prob->stats.time_init_derivatives += GET_ELAPSED_SECONDS(timer); } @@ -259,7 +262,7 @@ void problem_init_hessian_coo_lower_triangular(problem *prob) Timer timer; clock_gettime(CLOCK_MONOTONIC, &timer.start); prob->lagrange_hessian_coo = - new_coo_matrix_lower_triangular(prob->lagrange_hessian); + new_COO_matrix_lower_triangular(prob->lagrange_hessian); clock_gettime(CLOCK_MONOTONIC, &timer.end); prob->stats.time_init_derivatives += GET_ELAPSED_SECONDS(timer); } @@ -342,10 +345,10 @@ void free_problem(problem *prob) /* Free allocated arrays */ free(prob->constraint_values); free(prob->gradient_values); - free_csr_matrix(prob->jacobian); - free_csr_matrix(prob->lagrange_hessian); - free_coo_matrix(prob->jacobian_coo); - free_coo_matrix(prob->lagrange_hessian_coo); + free_CSR_matrix(prob->jacobian); + free_CSR_matrix(prob->lagrange_hessian); + free_COO_matrix(prob->jacobian_coo); + free_COO_matrix(prob->lagrange_hessian_coo); free(prob->hess_idx_map); /* Release expression references (decrements refcount) */ @@ -466,7 +469,7 @@ void problem_gradient(problem *prob) /* copy sparse jacobian to dense gradient */ memset(prob->gradient_values, 0, prob->n_vars * sizeof(double)); - CSR_Matrix *jac = prob->objective->jacobian; + CSR_matrix *jac = prob->objective->jacobian->to_csr(prob->objective->jacobian); for (int k = jac->p[0]; k < jac->p[1]; k++) { prob->gradient_values[jac->i[k]] = jac->x[k]; @@ -482,13 +485,12 @@ void problem_jacobian(problem *prob) clock_gettime(CLOCK_MONOTONIC, &timer.start); bool first_call = !prob->jacobian_called; - CSR_Matrix *J = prob->jacobian; + CSR_matrix *J = prob->jacobian; int nnz_offset = 0; for (int i = 0; i < prob->n_constraints; i++) { expr *c = prob->constraints[i]; - if (!first_call && c->is_affine(c)) { /* skip evaluation for affine constraints after first call */ @@ -537,21 +539,22 @@ void problem_hessian(problem *prob, double obj_w, const double *w) // ------------------------------------------------------------------------ // assemble Lagrange hessian using index map // ------------------------------------------------------------------------ - CSR_Matrix *H = prob->lagrange_hessian; + CSR_matrix *H = prob->lagrange_hessian; int *idx_map = prob->hess_idx_map; /* zero out hessian before adding contribution from obj and constraints */ memset(H->x, 0, H->nnz * sizeof(double)); /* accumulate objective function */ - accumulator(obj->wsum_hess, idx_map, H->x); + accumulator(obj->wsum_hess->x, obj->wsum_hess->nnz, idx_map, H->x); offset = obj->wsum_hess->nnz; /* accumulate constraint functions */ for (int i = 0; i < prob->n_constraints; i++) { - accumulator(constrs[i]->wsum_hess, idx_map + offset, H->x); - offset += constrs[i]->wsum_hess->nnz; + matrix *c_hess = constrs[i]->wsum_hess; + accumulator(c_hess->x, c_hess->nnz, idx_map + offset, H->x); + offset += c_hess->nnz; } clock_gettime(CLOCK_MONOTONIC, &timer.end); diff --git a/src/utils/COO_Matrix.c b/src/utils/COO_matrix.c similarity index 87% rename from src/utils/COO_Matrix.c rename to src/utils/COO_matrix.c index 727be25..8602c09 100644 --- a/src/utils/COO_Matrix.c +++ b/src/utils/COO_matrix.c @@ -15,14 +15,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "utils/COO_Matrix.h" +#include "utils/COO_matrix.h" #include "utils/tracked_alloc.h" #include #include -COO_Matrix *new_coo_matrix(const CSR_Matrix *A) +COO_matrix *new_COO_matrix(const CSR_matrix *A) { - COO_Matrix *coo = (COO_Matrix *) SP_MALLOC(sizeof(COO_Matrix)); + COO_matrix *coo = (COO_matrix *) SP_MALLOC(sizeof(COO_matrix)); coo->m = A->m; coo->n = A->n; coo->nnz = A->nnz; @@ -45,7 +45,7 @@ COO_Matrix *new_coo_matrix(const CSR_Matrix *A) return coo; } -COO_Matrix *new_coo_matrix_lower_triangular(const CSR_Matrix *A) +COO_matrix *new_COO_matrix_lower_triangular(const CSR_matrix *A) { /* Pass 1: count lower-triangular entries (col <= row) */ int count = 0; @@ -60,7 +60,7 @@ COO_Matrix *new_coo_matrix_lower_triangular(const CSR_Matrix *A) } } - COO_Matrix *coo = (COO_Matrix *) SP_MALLOC(sizeof(COO_Matrix)); + COO_matrix *coo = (COO_matrix *) SP_MALLOC(sizeof(COO_matrix)); coo->m = A->m; coo->n = A->n; coo->nnz = count; @@ -89,7 +89,7 @@ COO_Matrix *new_coo_matrix_lower_triangular(const CSR_Matrix *A) return coo; } -void refresh_lower_triangular_coo(COO_Matrix *coo, const double *vals) +void refresh_lower_triangular_coo(COO_matrix *coo, const double *vals) { for (int i = 0; i < coo->nnz; i++) { @@ -97,7 +97,7 @@ void refresh_lower_triangular_coo(COO_Matrix *coo, const double *vals) } } -void free_coo_matrix(COO_Matrix *matrix) +void free_COO_matrix(COO_matrix *matrix) { if (matrix) { diff --git a/src/utils/CSC_Matrix.c b/src/utils/CSC_matrix.c similarity index 90% rename from src/utils/CSC_Matrix.c rename to src/utils/CSC_matrix.c index 4718040..08eec98 100644 --- a/src/utils/CSC_Matrix.c +++ b/src/utils/CSC_matrix.c @@ -15,16 +15,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "utils/CSC_Matrix.h" +#include "utils/CSC_matrix.h" #include "utils/iVec.h" #include "utils/tracked_alloc.h" #include #include #include -CSC_Matrix *new_csc_matrix(int m, int n, int nnz) +CSC_matrix *new_CSC_matrix(int m, int n, int nnz) { - CSC_Matrix *matrix = (CSC_Matrix *) SP_MALLOC(sizeof(CSC_Matrix)); + CSC_matrix *matrix = (CSC_matrix *) SP_MALLOC(sizeof(CSC_matrix)); if (!matrix) return NULL; matrix->p = (int *) SP_MALLOC((n + 1) * sizeof(int)); @@ -47,7 +47,7 @@ CSC_Matrix *new_csc_matrix(int m, int n, int nnz) return matrix; } -void free_csc_matrix(CSC_Matrix *matrix) +void free_CSC_matrix(CSC_matrix *matrix) { if (matrix) { @@ -58,7 +58,7 @@ void free_csc_matrix(CSC_Matrix *matrix) } } -CSR_Matrix *ATA_alloc(const CSC_Matrix *A) +CSR_matrix *ATA_alloc(const CSC_matrix *A) { /* A is m x n, A^T A is n x n */ int n = A->n; @@ -102,7 +102,7 @@ CSR_Matrix *ATA_alloc(const CSC_Matrix *A) } /* Allocate C and symmetrize it */ - CSR_Matrix *C = new_csr_matrix(n, n, nnz); + CSR_matrix *C = new_CSR_matrix(n, n, nnz); symmetrize_csr(Cp, Ci->data, n, C); /* free workspace */ @@ -169,7 +169,7 @@ static inline double sparse_wdot(const double *a_x, const int *a_i, int a_nnz, return sum; } -void ATDA_fill_values(const CSC_Matrix *A, const double *d, CSR_Matrix *C) +void ATDA_fill_values(const CSC_matrix *A, const double *d, CSR_matrix *C) { int j, ii, jj; for (ii = 0; ii < C->m; ii++) @@ -203,9 +203,9 @@ void ATDA_fill_values(const CSC_Matrix *A, const double *d, CSR_Matrix *C) } } -CSC_Matrix *csr_to_csc_alloc(const CSR_Matrix *A, int *iwork) +CSC_matrix *csr_to_csc_alloc(const CSR_matrix *A, int *iwork) { - CSC_Matrix *C = new_csc_matrix(A->m, A->n, A->nnz); + CSC_matrix *C = new_CSC_matrix(A->m, A->n, A->nnz); int i, j; int *count = iwork; @@ -247,7 +247,7 @@ CSC_Matrix *csr_to_csc_alloc(const CSR_Matrix *A, int *iwork) return C; } -void csr_to_csc_fill_values(const CSR_Matrix *A, CSC_Matrix *C, int *iwork) +void csr_to_csc_fill_values(const CSR_matrix *A, CSC_matrix *C, int *iwork) { int i, j; int *count = iwork; @@ -266,9 +266,9 @@ void csr_to_csc_fill_values(const CSR_Matrix *A, CSC_Matrix *C, int *iwork) } } -CSR_Matrix *csc_to_csr_alloc(const CSC_Matrix *A, int *iwork) +CSR_matrix *csc_to_csr_alloc(const CSC_matrix *A, int *iwork) { - CSR_Matrix *C = new_csr_matrix(A->m, A->n, A->nnz); + CSR_matrix *C = new_CSR_matrix(A->m, A->n, A->nnz); int i, j; int *count = iwork; @@ -312,7 +312,7 @@ CSR_Matrix *csc_to_csr_alloc(const CSC_Matrix *A, int *iwork) return C; } -void csc_to_csr_fill_values(const CSC_Matrix *A, CSR_Matrix *C, int *iwork) +void csc_to_csr_fill_values(const CSC_matrix *A, CSR_matrix *C, int *iwork) { int i, j; int *count = iwork; @@ -332,7 +332,7 @@ void csc_to_csr_fill_values(const CSC_Matrix *A, CSR_Matrix *C, int *iwork) } } -CSR_Matrix *BTA_alloc(const CSC_Matrix *A, const CSC_Matrix *B) +CSR_matrix *BTA_alloc(const CSC_matrix *A, const CSC_matrix *B) { /* A is m x n, B is m x p, C = B^T A is p x n */ int n = A->n; @@ -378,7 +378,7 @@ CSR_Matrix *BTA_alloc(const CSC_Matrix *A, const CSC_Matrix *B) } /* Allocate C */ - CSR_Matrix *C = new_csr_matrix(p, n, nnz); + CSR_matrix *C = new_CSR_matrix(p, n, nnz); memcpy(C->p, Cp, (p + 1) * sizeof(int)); memcpy(C->i, Ci->data, nnz * sizeof(int)); @@ -389,7 +389,7 @@ CSR_Matrix *BTA_alloc(const CSC_Matrix *A, const CSC_Matrix *B) return C; } -void yTA_fill_values(const CSC_Matrix *A, const double *y, CSR_Matrix *C) +void yTA_fill_values(const CSC_matrix *A, const double *y, CSR_matrix *C) { for (int col = 0; col < A->n; col++) { @@ -413,9 +413,9 @@ void yTA_fill_values(const CSC_Matrix *A, const double *y, CSR_Matrix *C) } } -/* computes C = B^T * D * A in CSR */ -void BTDA_fill_values(const CSC_Matrix *A, const CSC_Matrix *B, const double *d, - CSR_Matrix *C) +/* computes C = B^T * D * A in CSR_matrix */ +void BTDA_fill_values(const CSC_matrix *A, const CSC_matrix *B, const double *d, + CSR_matrix *C) { int i, j, jj; for (i = 0; i < C->m; i++) @@ -446,7 +446,7 @@ void BTDA_fill_values(const CSC_Matrix *A, const CSC_Matrix *B, const double *d, * faster when Q is dense, since it touches each Q entry exactly once. * The sparse_dot approach below is simpler but redundantly scans * column j of A for each nonzero row of C. */ -void BA_fill_values(const CSR_Matrix *Q, const CSC_Matrix *A, CSC_Matrix *C) +void BA_fill_values(const CSR_matrix *Q, const CSC_matrix *A, CSC_matrix *C) { /* fill values of C = Q * A, given the sparsity pattern of C. */ int i, j, ii; @@ -467,10 +467,10 @@ void BA_fill_values(const CSR_Matrix *Q, const CSC_Matrix *A, CSC_Matrix *C) } } -CSC_Matrix *symBA_alloc(const CSR_Matrix *B, const CSC_Matrix *A) +CSC_matrix *symBA_alloc(const CSR_matrix *B, const CSC_matrix *A) { /* Allocate C = B * A (sparsity only). B must be symmetric. - * B is CSR (m x m), A is CSC (m x n), C is CSC (m x n). + * B is CSR_matrix (m x m), A is CSC_matrix (m x n), C is CSC_matrix (m x n). * * Column j of C is B * a_j = sum_k A_{k,j} B[:, k], so the nonzero * rows of column j of C are the union of the nonzero rows of B[:, k]. @@ -525,7 +525,7 @@ CSC_Matrix *symBA_alloc(const CSR_Matrix *B, const CSC_Matrix *A) /* allocate C and copy the computed structure */ int total_nnz = Cp[n]; - CSC_Matrix *C = new_csc_matrix(m, n, total_nnz); + CSC_matrix *C = new_CSC_matrix(m, n, total_nnz); memcpy(C->p, Cp, (n + 1) * sizeof(int)); memcpy(C->i, Ci->data, total_nnz * sizeof(int)); @@ -536,7 +536,7 @@ CSC_Matrix *symBA_alloc(const CSR_Matrix *B, const CSC_Matrix *A) return C; } -int count_nonzero_cols_csc(const CSC_Matrix *A) +int count_nonzero_cols_csc(const CSC_matrix *A) { int count = 0; for (int j = 0; j < A->n; j++) diff --git a/src/utils/CSR_Matrix.c b/src/utils/CSR_matrix.c similarity index 86% rename from src/utils/CSR_Matrix.c rename to src/utils/CSR_matrix.c index c52b97d..1123068 100644 --- a/src/utils/CSR_Matrix.c +++ b/src/utils/CSR_matrix.c @@ -15,7 +15,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "utils/CSR_Matrix.h" +#include "utils/CSR_matrix.h" #include "utils/int_double_pair.h" #include "utils/tracked_alloc.h" #include "utils/utils.h" @@ -25,9 +25,9 @@ #include #include -CSR_Matrix *new_csr_matrix(int m, int n, int nnz) +CSR_matrix *new_CSR_matrix(int m, int n, int nnz) { - CSR_Matrix *matrix = (CSR_Matrix *) SP_MALLOC(sizeof(CSR_Matrix)); + CSR_matrix *matrix = (CSR_matrix *) SP_MALLOC(sizeof(CSR_matrix)); matrix->p = (int *) SP_CALLOC(m + 1, sizeof(int)); matrix->i = (int *) SP_CALLOC(nnz, sizeof(int)); matrix->x = (double *) SP_MALLOC(nnz * sizeof(double)); @@ -37,24 +37,24 @@ CSR_Matrix *new_csr_matrix(int m, int n, int nnz) return matrix; } -CSR_Matrix *new_csr(const CSR_Matrix *A) +CSR_matrix *new_csr(const CSR_matrix *A) { - CSR_Matrix *copy = new_csr_matrix(A->m, A->n, A->nnz); + CSR_matrix *copy = new_CSR_matrix(A->m, A->n, A->nnz); memcpy(copy->p, A->p, (A->m + 1) * sizeof(int)); memcpy(copy->i, A->i, A->nnz * sizeof(int)); memcpy(copy->x, A->x, A->nnz * sizeof(double)); return copy; } -CSR_Matrix *new_csr_copy_sparsity(const CSR_Matrix *A) +CSR_matrix *new_csr_copy_sparsity(const CSR_matrix *A) { - CSR_Matrix *copy = new_csr_matrix(A->m, A->n, A->nnz); + CSR_matrix *copy = new_CSR_matrix(A->m, A->n, A->nnz); memcpy(copy->p, A->p, (A->m + 1) * sizeof(int)); memcpy(copy->i, A->i, A->nnz * sizeof(int)); return copy; } -void free_csr_matrix(CSR_Matrix *matrix) +void free_CSR_matrix(CSR_matrix *matrix) { if (matrix) { @@ -65,7 +65,7 @@ void free_csr_matrix(CSR_Matrix *matrix) } } -void copy_csr_matrix(const CSR_Matrix *A, CSR_Matrix *C) +void copy_CSR_matrix(const CSR_matrix *A, CSR_matrix *C) { C->m = A->m; C->n = A->n; @@ -75,7 +75,7 @@ void copy_csr_matrix(const CSR_Matrix *A, CSR_Matrix *C) memcpy(C->x, A->x, A->nnz * sizeof(double)); } -void Ax_csr(const CSR_Matrix *A, const double *x, double *y, int col_offset) +void Ax_csr(const CSR_matrix *A, const double *x, double *y, int col_offset) { for (int row = 0; row < A->m; row++) { @@ -88,7 +88,7 @@ void Ax_csr(const CSR_Matrix *A, const double *x, double *y, int col_offset) } } -int count_nonzero_cols(const CSR_Matrix *A, bool *col_nz) +int count_nonzero_cols(const CSR_matrix *A, bool *col_nz) { for (int row = 0; row < A->m; row++) { @@ -120,7 +120,7 @@ void insert_idx(int idx, int *arr, int len) arr[j] = idx; } -void DA_fill_values(const double *d, const CSR_Matrix *A, CSR_Matrix *C) +void DA_fill_values(const double *d, const CSR_matrix *A, CSR_matrix *C) { memcpy(C->x, A->x, A->nnz * sizeof(double)); @@ -133,9 +133,9 @@ void DA_fill_values(const double *d, const CSR_Matrix *A, CSR_Matrix *C) } } -CSR_Matrix *transpose(const CSR_Matrix *A, int *iwork) +CSR_matrix *transpose(const CSR_matrix *A, int *iwork) { - CSR_Matrix *AT = new_csr_matrix(A->n, A->m, A->nnz); + CSR_matrix *AT = new_CSR_matrix(A->n, A->m, A->nnz); int i, j; int *count = iwork; @@ -178,10 +178,10 @@ CSR_Matrix *transpose(const CSR_Matrix *A, int *iwork) return AT; } -CSR_Matrix *AT_alloc(const CSR_Matrix *A, int *iwork) +CSR_matrix *AT_alloc(const CSR_matrix *A, int *iwork) { /* Allocate A^T and compute sparsity pattern without filling values */ - CSR_Matrix *AT = new_csr_matrix(A->n, A->m, A->nnz); + CSR_matrix *AT = new_CSR_matrix(A->n, A->m, A->nnz); int i, j; int *count = iwork; @@ -223,7 +223,7 @@ CSR_Matrix *AT_alloc(const CSR_Matrix *A, int *iwork) return AT; } -void AT_fill_values(const CSR_Matrix *A, CSR_Matrix *AT, int *iwork) +void AT_fill_values(const CSR_matrix *A, CSR_matrix *AT, int *iwork) { /* Fill values of A^T given sparsity pattern is already computed */ int i, j; @@ -241,7 +241,7 @@ void AT_fill_values(const CSR_Matrix *A, CSR_Matrix *AT, int *iwork) } } -double csr_get_value(const CSR_Matrix *A, int row, int col) +double csr_get_value(const CSR_matrix *A, int row, int col) { for (int j = A->p[row]; j < A->p[row + 1]; j++) { @@ -253,7 +253,7 @@ double csr_get_value(const CSR_Matrix *A, int row, int col) return 0.0; } -void symmetrize_csr(const int *Ap, const int *Ai, int m, CSR_Matrix *C) +void symmetrize_csr(const int *Ap, const int *Ai, int m, CSR_matrix *C) { int i, j, col; diff --git a/src/utils/CSR_sum.c b/src/utils/CSR_sum.c index 0e5d4cf..286fd38 100644 --- a/src/utils/CSR_sum.c +++ b/src/utils/CSR_sum.c @@ -16,7 +16,7 @@ * limitations under the License. */ #include "utils/CSR_sum.h" -#include "utils/CSR_Matrix.h" +#include "utils/CSR_matrix.h" #include "utils/int_double_pair.h" #include "utils/tracked_alloc.h" #include "utils/utils.h" @@ -24,7 +24,7 @@ #include #include -void sum_csr_alloc(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C) +void sum_csr_alloc(const CSR_matrix *A, const CSR_matrix *B, CSR_matrix *C) { /* A and B must be different from C */ assert(A != C && B != C); @@ -81,7 +81,7 @@ void sum_csr_alloc(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C) C->p[A->m] = C->nnz; } -void sum_csr_fill_values(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C) +void sum_csr_fill_values(const CSR_matrix *A, const CSR_matrix *B, CSR_matrix *C) { /* Assumes C->p and C->i already contain the sparsity pattern of A+B. Fills only C->x accordingly. */ @@ -113,8 +113,8 @@ void sum_csr_fill_values(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C } } -void sum_scaled_csr_matrices_fill_values(const CSR_Matrix *A, const CSR_Matrix *B, - CSR_Matrix *C, const double *d1, +void sum_scaled_csr_matrices_fill_values(const CSR_matrix *A, const CSR_matrix *B, + CSR_matrix *C, const double *d1, const double *d2) { /* Assumes C->p and C->i already contain the sparsity pattern of A+B. @@ -148,7 +148,7 @@ void sum_scaled_csr_matrices_fill_values(const CSR_Matrix *A, const CSR_Matrix * } /* iwork must have size max(A->n, A->nnz), and idx_map must have size A->nnz */ -void sum_block_of_rows_csr_alloc(const CSR_Matrix *A, CSR_Matrix *C, +void sum_block_of_rows_csr_alloc(const CSR_matrix *A, CSR_matrix *C, int row_block_size, int *iwork, int *idx_map) { assert(A->m % row_block_size == 0); @@ -220,7 +220,7 @@ void sum_block_of_rows_csr_alloc(const CSR_Matrix *A, CSR_Matrix *C, } /* iwork must have size max(A->n, A->nnz), and idx_map must have size A->nnz */ -void sum_evenly_spaced_rows_csr_alloc(const CSR_Matrix *A, CSR_Matrix *C, +void sum_evenly_spaced_rows_csr_alloc(const CSR_matrix *A, CSR_matrix *C, int row_spacing, int *iwork, int *idx_map) { assert(C->m == row_spacing); @@ -285,16 +285,16 @@ void sum_evenly_spaced_rows_csr_alloc(const CSR_Matrix *A, CSR_Matrix *C, } } -void accumulator(const CSR_Matrix *A, const int *idx_map, double *out) +void accumulator(const double *vals, int nnz, const int *idx_map, double *out) { /* don't forget to initialize accumulator to 0 before calling this */ - for (int j = 0; j < A->nnz; j++) + for (int j = 0; j < nnz; j++) { - out[idx_map[j]] += A->x[j]; + out[idx_map[j]] += vals[j]; } } -void accumulator_with_spacing(const CSR_Matrix *A, const int *idx_map, double *out, +void accumulator_with_spacing(const CSR_matrix *A, const int *idx_map, double *out, int spacing) { /* don't forget to initialze accumulator to 0 before calling this */ @@ -307,7 +307,7 @@ void accumulator_with_spacing(const CSR_Matrix *A, const int *idx_map, double *o } } -void sum_all_rows_csr_alloc(const CSR_Matrix *A, CSR_Matrix *C, int *iwork, +void sum_all_rows_csr_alloc(const CSR_matrix *A, CSR_matrix *C, int *iwork, int *idx_map) { // ------------------------------------------------------------------- @@ -354,23 +354,23 @@ void sum_all_rows_csr_alloc(const CSR_Matrix *A, CSR_Matrix *C, int *iwork, /* * Sums evenly spaced rows from A into a single row in C and fills an index map. - * A: input CSR matrix - * C: output CSR matrix (must have m=1) + * A: input CSR_matrix matrix + * C: output CSR_matrix matrix (must have m=1) * spacing: row spacing * iwork: workspace of size at least max(A->n, A->nnz) * idx_map: output index map, size at least A->nnz */ -CSR_Matrix *sum_4_csr_alloc(const CSR_Matrix *A, const CSR_Matrix *B, - const CSR_Matrix *C, const CSR_Matrix *D, +CSR_matrix *sum_4_csr_alloc(const CSR_matrix *A, const CSR_matrix *B, + const CSR_matrix *C, const CSR_matrix *D, int *idx_maps[4]) { - const CSR_Matrix *inputs[4] = {A, B, C, D}; + const CSR_matrix *inputs[4] = {A, B, C, D}; int m = A->m; int n = A->n; int nnz_ub = A->nnz + B->nnz + C->nnz + D->nnz; /* allocate output and index maps */ - CSR_Matrix *out = new_csr_matrix(m, n, nnz_ub); + CSR_matrix *out = new_CSR_matrix(m, n, nnz_ub); for (int k = 0; k < 4; k++) { idx_maps[k] = (int *) SP_MALLOC(inputs[k]->nnz * sizeof(int)); @@ -430,7 +430,7 @@ CSR_Matrix *sum_4_csr_alloc(const CSR_Matrix *A, const CSR_Matrix *B, return out; } -void sum_spaced_rows_into_row_csr_alloc(const CSR_Matrix *A, CSR_Matrix *C, +void sum_spaced_rows_into_row_csr_alloc(const CSR_matrix *A, CSR_matrix *C, int spacing, int *iwork, int *idx_map) { assert(C->m == 1); diff --git a/src/utils/dense_matrix.c b/src/utils/dense_matrix.c deleted file mode 100644 index 5c628c2..0000000 --- a/src/utils/dense_matrix.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright 2026 Daniel Cederberg and William Zhang - * - * This file is part of the SparseDiffEngine project. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include "utils/dense_matrix.h" -#include "utils/cblas_wrapper.h" -#include "utils/linalg_dense_sparse_matmuls.h" -#include "utils/tracked_alloc.h" -#include -#include - -static void dense_block_left_mult_vec(const Matrix *A, const double *x, double *y, - int p) -{ - const Dense_Matrix *dm = (const Dense_Matrix *) A; - int m = dm->base.m; - int n = dm->base.n; - - /* y = kron(I_p, A) @ x via a single dgemm call: - Treat x as n x p (column-major blocks) and y as m x p. - But x and y are stored as p blocks of length n and m - respectively (i.e. block-interleaved). This is the same as - treating them as row-major matrices of shape p x n and - p x m, so: - y (p x m) = x (p x n) * A^T (n x m), all row-major. - cblas with RowMajor: C = alpha * A * B + beta * C - where A = x (p x n), B = A^T (n x m), C = y (p x m). */ - cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, p, m, n, 1.0, x, n, dm->x, - n, 0.0, y, m); -} - -static void dense_update_values(Matrix *self, const double *new_values) -{ - Dense_Matrix *dm = (Dense_Matrix *) self; - memcpy(dm->x, new_values, dm->base.m * dm->base.n * sizeof(double)); -} - -static void dense_free(Matrix *A) -{ - Dense_Matrix *dm = (Dense_Matrix *) A; - free(dm->x); - free(dm->work); - free(dm); -} - -Matrix *new_dense_matrix(int m, int n, const double *data) -{ - Dense_Matrix *dm = (Dense_Matrix *) SP_CALLOC(1, sizeof(Dense_Matrix)); - dm->base.m = m; - dm->base.n = n; - dm->base.block_left_mult_vec = dense_block_left_mult_vec; - dm->base.block_left_mult_sparsity = I_kron_A_alloc; - dm->base.block_left_mult_values = I_kron_A_fill_values; - dm->base.update_values = dense_update_values; - dm->base.free_fn = dense_free; - dm->x = (double *) SP_MALLOC(m * n * sizeof(double)); - if (data != NULL) - { - memcpy(dm->x, data, m * n * sizeof(double)); - } - dm->work = (double *) SP_MALLOC(n * sizeof(double)); - return &dm->base; -} - -Matrix *dense_matrix_trans(const Dense_Matrix *A) -{ - int m = A->base.m; - int n = A->base.n; - double *AT_x = (double *) SP_MALLOC(m * n * sizeof(double)); - - A_transpose(AT_x, A->x, m, n); - - Matrix *result = new_dense_matrix(n, m, AT_x); - free(AT_x); - return result; -} - -void A_transpose(double *AT, const double *A, int m, int n) -{ - for (int i = 0; i < m; i++) - { - for (int j = 0; j < n; j++) - { - AT[j * m + i] = A[i * n + j]; - } - } -} diff --git a/src/utils/linalg_dense_sparse_matmuls.c b/src/utils/linalg_dense_sparse_matmuls.c index 74cd5fe..22f133d 100644 --- a/src/utils/linalg_dense_sparse_matmuls.c +++ b/src/utils/linalg_dense_sparse_matmuls.c @@ -15,21 +15,21 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "utils/CSC_Matrix.h" -#include "utils/CSR_Matrix.h" +#include "utils/CSC_matrix.h" +#include "utils/CSR_matrix.h" #include "utils/cblas_wrapper.h" -#include "utils/dense_matrix.h" #include "utils/iVec.h" +#include "utils/linalg_dense_sparse_matmuls.h" #include "utils/tracked_alloc.h" #include #include #include /* --------------------------------------------------------------- - * C = (I_p kron A) @ J via the polymorphic Matrix interface. - * A is dense m x n, J is (n*p) x k CSC, C is (m*p) x k CSC. + * C = (I_p kron A) @ J via the polymorphic matrix interface. + * A is dense m x n, J is (n*p) x k CSC_matrix, C is (m*p) x k CSC_matrix. * --------------------------------------------------------------- */ -CSC_Matrix *I_kron_A_alloc(const Matrix *A, const CSC_Matrix *J, int p) +CSC_matrix *I_kron_A_alloc(const matrix *A, const CSC_matrix *J, int p) { int m = A->m; int n = A->n; @@ -82,7 +82,7 @@ CSC_Matrix *I_kron_A_alloc(const Matrix *A, const CSC_Matrix *J, int p) Cp[j + 1] = Ci->len; } - CSC_Matrix *C = new_csc_matrix(m * p, J->n, Ci->len); + CSC_matrix *C = new_CSC_matrix(m * p, J->n, Ci->len); memcpy(C->p, Cp, (J->n + 1) * sizeof(int)); memcpy(C->i, Ci->data, Ci->len * sizeof(int)); free(Cp); @@ -91,16 +91,16 @@ CSC_Matrix *I_kron_A_alloc(const Matrix *A, const CSC_Matrix *J, int p) return C; } -void I_kron_A_fill_values(const Matrix *A, const CSC_Matrix *J, CSC_Matrix *C) +void I_kron_A_fill_values(const matrix *A, const CSC_matrix *J, CSC_matrix *C, + double *work) { - const Dense_Matrix *dm = (const Dense_Matrix *) A; - int m = dm->base.m; - int n = dm->base.n; + int m = A->m; + int n = A->n; int k = J->n; int i, j, s, block, block_start, block_end, start, end; - double *j_dense = dm->work; + double *j_dense = work; /* for each column of J (and C) */ for (j = 0; j < k; j++) @@ -131,7 +131,7 @@ void I_kron_A_fill_values(const Matrix *A, const CSC_Matrix *J, CSC_Matrix *C) /* Fast path: C column segment = val * A[:, row_in_block] */ int row_in_block = J->i[start] - block_start; double val = J->x[start]; - cblas_dcopy(m, dm->x + row_in_block, n, C->x + i, 1); + cblas_dcopy(m, A->x + row_in_block, n, C->x + i, 1); if (val != 1.0) { cblas_dscal(m, val, C->x + i, 1); @@ -147,7 +147,7 @@ void I_kron_A_fill_values(const Matrix *A, const CSC_Matrix *J, CSC_Matrix *C) j_dense[J->i[s] - block_start] = J->x[s]; } - cblas_dgemv(CblasRowMajor, CblasNoTrans, m, n, 1.0, dm->x, n, + cblas_dgemv(CblasRowMajor, CblasNoTrans, m, n, 1.0, A->x, n, j_dense, 1, 0.0, C->x + i, 1); } } @@ -156,9 +156,9 @@ void I_kron_A_fill_values(const Matrix *A, const CSC_Matrix *J, CSC_Matrix *C) /* --------------------------------------------------------------- * C = (Y^T kron I_m) @ J - * Y is k x n (col-major), J is (m*k) x p CSC, C is (m*n) x p CSR + * Y is k x n (col-major), J is (m*k) x p CSC_matrix, C is (m*n) x p CSR_matrix * --------------------------------------------------------------- */ -CSR_Matrix *YT_kron_I_alloc(int m, int k, int n, const CSC_Matrix *J) +CSR_matrix *YT_kron_I_alloc(int m, int k, int n, const CSC_matrix *J) { (void) k; /* C has n blocks of m rows. All rows at the same position within @@ -198,7 +198,7 @@ CSR_Matrix *YT_kron_I_alloc(int m, int k, int n, const CSC_Matrix *J) // --------------------------------------------------------------- // replicate sparsity pattern across blocks // --------------------------------------------------------------- - CSR_Matrix *C = new_csr_matrix(m * n, J->n, total_nnz); + CSR_matrix *C = new_CSR_matrix(m * n, J->n, total_nnz); int idx = 0; for (i = 0; i < m * n; i++) { @@ -219,8 +219,8 @@ CSR_Matrix *YT_kron_I_alloc(int m, int k, int n, const CSC_Matrix *J) return C; } -void YT_kron_I_fill_values(int m, int k, int n, const double *Y, const CSC_Matrix *J, - CSR_Matrix *C) +void YT_kron_I_fill_values(int m, int k, int n, const double *Y, const CSC_matrix *J, + CSR_matrix *C) { (void) n; assert(C->m == m * n); @@ -256,7 +256,7 @@ void YT_kron_I_fill_values(int m, int k, int n, const double *Y, const CSC_Matri } } -CSR_Matrix *I_kron_X_alloc(int m, int k, int n, const CSC_Matrix *J) +CSR_matrix *I_kron_X_alloc(int m, int k, int n, const CSC_matrix *J) { /* Step 1: for each block, find which columns of J have any * nonzero in row range [blk*k, blk*k + k). */ @@ -287,7 +287,7 @@ CSR_Matrix *I_kron_X_alloc(int m, int k, int n, const CSC_Matrix *J) /* Step 2: replicate each block's pattern for all m rows * within that block. */ - CSR_Matrix *C = new_csr_matrix(m * n, J->n, total_nnz); + CSR_matrix *C = new_CSR_matrix(m * n, J->n, total_nnz); int idx = 0; for (i = 0; i < m * n; i++) { @@ -308,8 +308,8 @@ CSR_Matrix *I_kron_X_alloc(int m, int k, int n, const CSC_Matrix *J) return C; } -void I_kron_X_fill_values(int m, int k, int n, const double *X, const CSC_Matrix *J, - CSR_Matrix *C) +void I_kron_X_fill_values(int m, int k, int n, const double *X, const CSC_matrix *J, + CSR_matrix *C) { (void) n; assert(C->m == m * n); diff --git a/src/utils/linalg_sparse_matmuls.c b/src/utils/linalg_sparse_matmuls.c index 42ee992..7720f70 100644 --- a/src/utils/linalg_sparse_matmuls.c +++ b/src/utils/linalg_sparse_matmuls.c @@ -15,35 +15,17 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include "utils/CSC_Matrix.h" -#include "utils/CSR_Matrix.h" +#include "utils/CSC_matrix.h" +#include "utils/CSR_matrix.h" #include "utils/iVec.h" #include "utils/tracked_alloc.h" +#include "utils/utils.h" #include #include #include #include #include -static inline bool has_overlap(const int *a_idx, int a_len, const int *b_idx, - int b_len, int b_offset) -{ - int ai = 0, bi = 0; - while (ai < a_len && bi < b_len) - { - if (a_idx[ai] == b_idx[bi] - b_offset) return true; - if (a_idx[ai] < b_idx[bi] - b_offset) - { - ai++; - } - else - { - bi++; - } - } - return false; -} - /* Unweighted sparse dot product of two sorted index arrays */ static inline double sparse_dot(const double *a_x, const int *a_i, int a_nnz, const double *b_x, const int *b_i, int b_nnz, @@ -104,8 +86,8 @@ static inline double sparse_dot_offset(const double *a_x, const int *a_idx, return sum; } -CSC_Matrix *block_left_multiply_fill_sparsity(const CSR_Matrix *A, - const CSC_Matrix *J, int p) +CSC_matrix *block_left_multiply_fill_sparsity(const CSR_matrix *A, + const CSC_matrix *J, int p) { /* A is m x n, J is (n*p) x k, C is (m*p) x k */ int m = A->m; @@ -175,7 +157,7 @@ CSC_Matrix *block_left_multiply_fill_sparsity(const CSR_Matrix *A, Cp[j + 1] = Ci->len; } - CSC_Matrix *C = new_csc_matrix(m * p, J->n, Ci->len); + CSC_matrix *C = new_CSC_matrix(m * p, J->n, Ci->len); memcpy(C->p, Cp, (J->n + 1) * sizeof(int)); memcpy(C->i, Ci->data, Ci->len * sizeof(int)); free(Cp); @@ -184,8 +166,8 @@ CSC_Matrix *block_left_multiply_fill_sparsity(const CSR_Matrix *A, return C; } -void block_left_multiply_fill_values(const CSR_Matrix *A, const CSC_Matrix *J, - CSC_Matrix *C) +void block_left_multiply_fill_values(const CSR_matrix *A, const CSC_matrix *J, + CSC_matrix *C) { /* A is m x n, J is (n*p) x k, C is (m*p) x k */ int m = A->m; @@ -246,9 +228,9 @@ void block_left_multiply_fill_values(const CSR_Matrix *A, const CSC_Matrix *J, } } -/* Fill values of C = A @ B where A is CSR, B is CSC. */ -void csr_csc_matmul_fill_values(const CSR_Matrix *A, const CSC_Matrix *B, - CSR_Matrix *C) +/* Fill values of C = A @ B where A is CSR_matrix, B is CSC_matrix. */ +void csr_csc_matmul_fill_values(const CSR_matrix *A, const CSC_matrix *B, + CSR_matrix *C) { for (int i = 0; i < A->m; i++) { @@ -268,9 +250,9 @@ void csr_csc_matmul_fill_values(const CSR_Matrix *A, const CSC_Matrix *B, } } -/* C = A @ B where A is CSR (m x n), B is CSC (n x p). Result C is CSR (m x p) +/* C = A @ B where A is CSR_matrix (m x n), B is CSC_matrix (n x p). Result C is CSR_matrix (m x p) with precomputed sparsity pattern */ -CSR_Matrix *csr_csc_matmul_alloc(const CSR_Matrix *A, const CSC_Matrix *B) +CSR_matrix *csr_csc_matmul_alloc(const CSR_matrix *A, const CSC_matrix *B) { int m = A->m; int p = B->n; @@ -304,7 +286,7 @@ CSR_Matrix *csr_csc_matmul_alloc(const CSR_Matrix *A, const CSC_Matrix *B) Cp[i + 1] = nnz; } - CSR_Matrix *C = new_csr_matrix(m, p, nnz); + CSR_matrix *C = new_CSR_matrix(m, p, nnz); memcpy(C->p, Cp, (m + 1) * sizeof(int)); memcpy(C->i, Ci->data, nnz * sizeof(int)); free(Cp); @@ -317,7 +299,7 @@ CSR_Matrix *csr_csc_matmul_alloc(const CSR_Matrix *A, const CSC_Matrix *B) * y = [A @ x1; A @ x2; ...; A @ xp] where A is m x n and x is (n*p)-length vector. * x is split into p blocks of n elements each. */ -void block_left_multiply_vec(const struct CSR_Matrix *A, const double *x, double *y, +void block_left_multiply_vec(const struct CSR_matrix *A, const double *x, double *y, int p) { /* For each block */ diff --git a/src/utils/matrix_BTA.c b/src/utils/matrix_BTA.c new file mode 100644 index 0000000..cd4d815 --- /dev/null +++ b/src/utils/matrix_BTA.c @@ -0,0 +1,97 @@ +/* + * Copyright 2026 Daniel Cederberg and William Zhang + * + * This file is part of the SparseDiffEngine project. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ +#include "utils/matrix_BTA.h" + +#include "utils/CSC_matrix.h" +#include "utils/CSR_matrix.h" +#include "utils/permuted_dense.h" +#include "utils/sparse_matrix.h" + +matrix *BTA_matrices_alloc(matrix *A, matrix *B) +{ + if (A->is_permuted_dense && B->is_permuted_dense) + { + return BTA_pd_pd_alloc((permuted_dense *) B, (permuted_dense *) A); + } + if (B->is_permuted_dense) + { + sparse_matrix *sm_A = (sparse_matrix *) A; + return BTA_pd_csc_alloc((permuted_dense *) B, sm_A->csc_cache); + } + if (A->is_permuted_dense) + { + sparse_matrix *sm_B = (sparse_matrix *) B; + return BTA_csc_pd_alloc(sm_B->csc_cache, (permuted_dense *) A); + } + + /* both sparse */ + sparse_matrix *sm_A = (sparse_matrix *) A; + sparse_matrix *sm_B = (sparse_matrix *) B; + CSR_matrix *C_csr = BTA_alloc(sm_A->csc_cache, sm_B->csc_cache); + return new_sparse_matrix(C_csr); +} + +void BTDA_matrices_fill_values(matrix *A, const double *d, matrix *B, matrix *C) +{ + if (A->is_permuted_dense && B->is_permuted_dense) + { + BTDA_pd_pd_fill_values((permuted_dense *) B, d, (permuted_dense *) A, + (permuted_dense *) C); + return; + } + if (B->is_permuted_dense) + { + sparse_matrix *sm_A = (sparse_matrix *) A; + BTDA_pd_csc_fill_values((permuted_dense *) B, d, sm_A->csc_cache, + (permuted_dense *) C); + return; + } + if (A->is_permuted_dense) + { + sparse_matrix *sm_B = (sparse_matrix *) B; + BTDA_csc_pd_fill_values(sm_B->csc_cache, d, (permuted_dense *) A, + (permuted_dense *) C); + return; + } + + /* both sparse */ + sparse_matrix *sm_A = (sparse_matrix *) A; + sparse_matrix *sm_B = (sparse_matrix *) B; + sparse_matrix *sm_C = (sparse_matrix *) C; + BTDA_fill_values(sm_A->csc_cache, sm_B->csc_cache, d, sm_C->csr); +} + +matrix *BA_pd_matrices_alloc(const permuted_dense *B, const matrix *A) +{ + if (A->is_permuted_dense) + { + return BA_pd_pd_alloc(B, (const permuted_dense *) A); + } + /* A is sparse — use the existing BA_pd_csc_* kernels. Ensure the + csc_cache structure exists at alloc time. */ + sparse_matrix *sm_A = (sparse_matrix *) A; + sparse_matrix_ensure_csc_cache(sm_A); + return BA_pd_csc_alloc(B, sm_A->csc_cache); +} + +void BA_pd_matrices_fill_values(const permuted_dense *B, const matrix *A, + permuted_dense *C) +{ + if (A->is_permuted_dense) + { + BA_pd_pd_fill_values(B, (const permuted_dense *) A, C); + return; + } + /* A is sparse — caller must have refreshed sm_A->csc_cache values. */ + sparse_matrix *sm_A = (sparse_matrix *) A; + BA_pd_csc_fill_values(B->X, B->n0, B->col_inv, sm_A->csc_cache, C); +} diff --git a/src/utils/matrix_sum.c b/src/utils/matrix_sum.c new file mode 100644 index 0000000..c8b12b5 --- /dev/null +++ b/src/utils/matrix_sum.c @@ -0,0 +1,38 @@ +/* + * Copyright 2026 Daniel Cederberg and William Zhang + * + * This file is part of the SparseDiffEngine project. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "utils/matrix_sum.h" +#include "utils/CSR_sum.h" + +void sum_matrices_alloc(matrix *A, matrix *B, matrix *C) +{ + CSR_matrix *cc = C->to_csr(C); + sum_csr_alloc(A->to_csr(A), B->to_csr(B), cc); + C->nnz = cc->nnz; +} + +void sum_matrices_fill_values(matrix *A, matrix *B, matrix *C) +{ + sum_csr_fill_values(A->to_csr(A), B->to_csr(B), C->to_csr(C)); +} + +void sum_scaled_matrices_fill_values(matrix *A, matrix *B, matrix *C, + const double *d1, const double *d2) +{ + sum_scaled_csr_matrices_fill_values(A->to_csr(A), B->to_csr(B), C->to_csr(C), + d1, d2); +} diff --git a/src/utils/mini_numpy.c b/src/utils/mini_numpy.c index baa00c5..141e77e 100644 --- a/src/utils/mini_numpy.c +++ b/src/utils/mini_numpy.c @@ -69,6 +69,17 @@ void mat_mat_mult(const double *X, const double *Y, double *Z, int m, int k, int } } +void A_transpose(double *AT, const double *A, int m, int n) +{ + for (int i = 0; i < m; i++) + { + for (int j = 0; j < n; j++) + { + AT[j * m + i] = A[i * n + j]; + } + } +} + void Y_kron_I_vec(int m, int k, int n, const double *Y, const double *w, double *v) { for (int j = 0; j < k; j++) @@ -101,7 +112,7 @@ void I_kron_XT_vec(int m, int k, int n, const double *X, const double *w, double } } -void conv_matrix_fill_sparsity(CSR_Matrix *T_csr, int m, int n) +void conv_matrix_fill_sparsity(CSR_matrix *T_csr, int m, int n) { int nnz = 0; for (int r = 0; r < T_csr->m; r++) @@ -118,7 +129,7 @@ void conv_matrix_fill_sparsity(CSR_Matrix *T_csr, int m, int n) T_csr->p[T_csr->m] = nnz; } -void conv_matrix_fill_values(CSR_Matrix *T_csr, const double *a) +void conv_matrix_fill_values(CSR_matrix *T_csr, const double *a) { for (int r = 0; r < T_csr->m; r++) { diff --git a/src/utils/permuted_dense.c b/src/utils/permuted_dense.c new file mode 100644 index 0000000..34d7466 --- /dev/null +++ b/src/utils/permuted_dense.c @@ -0,0 +1,1061 @@ +/* + * Copyright 2026 Daniel Cederberg and William Zhang + * + * This file is part of the SparseDiffEngine project. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "utils/permuted_dense.h" +#include "utils/cblas_wrapper.h" +#include "utils/iVec.h" +#include "utils/linalg_dense_sparse_matmuls.h" +#include "utils/tracked_alloc.h" +#include "utils/utils.h" +#include +#include +#include +#include + +static void permuted_dense_free(matrix *self) +{ + permuted_dense *pd = (permuted_dense *) self; + free(pd->row_perm); + free(pd->col_perm); + free(pd->col_inv); + free(pd->row_inv); + /* csr_cache->x aliases pd->X (set in permuted_dense_to_csr_alloc); NULL it + so free_CSR_matrix doesn't double-free the shared buffer. */ + if (pd->csr_cache != NULL) + { + pd->csr_cache->x = NULL; + } + free_CSR_matrix(pd->csr_cache); + free(pd->X); + free(pd->dwork); + free(pd->iwork); + free(pd); +} + +/* permuted_dense has no CSC_matrix mirror; chain-rule kernels operate on X directly. + */ +static void permuted_dense_refresh_csc_values(matrix *self) +{ + (void) self; +} + +/* Vtable adapters — each delegates to the existing permuted_dense_* kernel. */ +static matrix *permuted_dense_vtable_copy_sparsity(const matrix *self) +{ + const permuted_dense *pd = (const permuted_dense *) self; + return new_permuted_dense(pd->base.m, pd->base.n, pd->m0, pd->n0, pd->row_perm, + pd->col_perm, NULL); +} + +static void permuted_dense_vtable_DA_fill_values(const double *d, const matrix *self, + matrix *out) +{ + DA_pd_fill_values(d, (const permuted_dense *) self, (permuted_dense *) out); +} + +static matrix *permuted_dense_vtable_ATA_alloc(matrix *self) +{ + return ATA_pd_alloc((const permuted_dense *) self); +} + +static void permuted_dense_vtable_ATDA_fill_values(const matrix *self, + const double *d, matrix *out) +{ + ATDA_pd_fill_values((const permuted_dense *) self, d, (permuted_dense *) out); +} + +/* Forward decls; definitions lower in the file. */ +static CSR_matrix *permuted_dense_to_csr_alloc(const permuted_dense *A); +static void permuted_dense_ensure_dwork(const permuted_dense *pd_const, size_t size); + +/* Lazy CSR_matrix view: allocate structure on first call, then return the cache. + The cache's x array aliases pd->X (see permuted_dense_to_csr_alloc), so + values are always live without a per-call refresh. */ +static CSR_matrix *permuted_dense_to_csr(matrix *self) +{ + permuted_dense *pd = (permuted_dense *) self; + if (pd->csr_cache == NULL) + { + pd->csr_cache = permuted_dense_to_csr_alloc(pd); + } + return pd->csr_cache; +} + +static matrix *permuted_dense_vtable_transpose_alloc(const matrix *self) +{ + const permuted_dense *pd = (const permuted_dense *) self; + /* Swap (m, n), (m0, n0), and (row_perm, col_perm). The constructor + asserts strict increase of both perms, which holds by construction. */ + return new_permuted_dense(pd->base.n, pd->base.m, pd->n0, pd->m0, pd->col_perm, + pd->row_perm, NULL); +} + +static void permuted_dense_vtable_transpose_fill_values(const matrix *self, + matrix *out) +{ + const permuted_dense *pd_in = (const permuted_dense *) self; + permuted_dense *pd_out = (permuted_dense *) out; + int m0 = pd_in->m0; + int n0 = pd_in->n0; + /* pd_out has shape (n0, m0); transpose pd_in->X into pd_out->X. */ + for (int ii = 0; ii < m0; ii++) + { + for (int jj = 0; jj < n0; jj++) + { + pd_out->X[jj * m0 + ii] = pd_in->X[ii * n0 + jj]; + } + } +} + +static matrix *permuted_dense_vtable_index_alloc(matrix *self, const int *indices, + int n_idxs) +{ + const permuted_dense *pd = (const permuted_dense *) self; + + /* Scan indices: which output positions i hit a row in pd->row_perm? */ + int *new_row_perm = (int *) SP_MALLOC(n_idxs * sizeof(int)); + int new_m0 = 0; + for (int i = 0; i < n_idxs; i++) + { + if (pd->row_inv[indices[i]] >= 0) + { + new_row_perm[new_m0++] = i; + } + } + + matrix *out = new_permuted_dense(n_idxs, pd->base.n, new_m0, pd->n0, + new_row_perm, pd->col_perm, NULL); + free(new_row_perm); + return out; +} + +static void permuted_dense_vtable_index_fill_values(matrix *self, const int *indices, + int n_idxs, matrix *out) +{ + (void) n_idxs; + const permuted_dense *pd = (const permuted_dense *) self; + permuted_dense *out_pd = (permuted_dense *) out; + int n0 = pd->n0; + for (int k = 0; k < out_pd->m0; k++) + { + int i = out_pd->row_perm[k]; + int old_ii = pd->row_inv[indices[i]]; + memcpy(out_pd->X + k * n0, pd->X + old_ii * n0, n0 * sizeof(double)); + } +} + +static matrix *permuted_dense_vtable_promote_alloc(matrix *self, int size) +{ + const permuted_dense *pd = (const permuted_dense *) self; + assert(pd->m0 <= 1); + + if (pd->m0 == 0) + { + /* source row is all-zero; output is also structurally all-zero. */ + return new_permuted_dense(size, pd->base.n, 0, pd->n0, NULL, pd->col_perm, + NULL); + } + + int *new_row_perm = (int *) SP_MALLOC(size * sizeof(int)); + for (int i = 0; i < size; i++) + { + new_row_perm[i] = i; + } + matrix *out = new_permuted_dense(size, pd->base.n, size, pd->n0, new_row_perm, + pd->col_perm, NULL); + free(new_row_perm); + return out; +} + +static void permuted_dense_vtable_promote_fill_values(matrix *self, matrix *out) +{ + const permuted_dense *pd = (const permuted_dense *) self; + permuted_dense *out_pd = (permuted_dense *) out; + if (pd->m0 == 0) return; + int n0 = pd->n0; + for (int k = 0; k < out_pd->m0; k++) + { + memcpy(out_pd->X + k * n0, pd->X, n0 * sizeof(double)); + } +} + +static matrix *permuted_dense_vtable_broadcast_alloc(matrix *self, + broadcast_type type, int d1, + int d2) +{ + const permuted_dense *pd = (const permuted_dense *) self; + int out_m = d1 * d2; + + int new_m0; + if (type == BROADCAST_SCALAR) + { + new_m0 = (pd->m0 == 0) ? 0 : out_m; + } + else if (type == BROADCAST_ROW) + { + new_m0 = d1 * pd->m0; + } + else /* BROADCAST_COL */ + { + new_m0 = d2 * pd->m0; + } + + if (new_m0 == 0) + { + return new_permuted_dense(out_m, pd->base.n, 0, pd->n0, NULL, pd->col_perm, + NULL); + } + + int *new_row_perm = (int *) SP_MALLOC(new_m0 * sizeof(int)); + int k = 0; + if (type == BROADCAST_SCALAR) + { + for (int i = 0; i < out_m; i++) + { + new_row_perm[k++] = i; + } + } + else if (type == BROADCAST_ROW) + { + for (int j_ii = 0; j_ii < pd->m0; j_ii++) + { + int j_old = pd->row_perm[j_ii]; + for (int i = 0; i < d1; i++) + { + new_row_perm[k++] = j_old * d1 + i; + } + } + } + else /* BROADCAST_COL */ + { + for (int j = 0; j < d2; j++) + { + for (int ii_old = 0; ii_old < pd->m0; ii_old++) + { + new_row_perm[k++] = j * d1 + pd->row_perm[ii_old]; + } + } + } + + matrix *out = new_permuted_dense(out_m, pd->base.n, new_m0, pd->n0, new_row_perm, + pd->col_perm, NULL); + free(new_row_perm); + return out; +} + +static void permuted_dense_vtable_broadcast_fill_values(matrix *self, + broadcast_type type, int d1, + int d2, matrix *out) +{ + const permuted_dense *pd = (const permuted_dense *) self; + permuted_dense *out_pd = (permuted_dense *) out; + if (pd->m0 == 0) + { + return; + } + int n0 = pd->n0; + + if (type == BROADCAST_SCALAR) + { + for (int k = 0; k < out_pd->m0; k++) + { + memcpy(out_pd->X + k * n0, pd->X, n0 * sizeof(double)); + } + } + else if (type == BROADCAST_ROW) + { + /* output row k corresponds to child dense row (k / d1). */ + (void) d2; + for (int k = 0; k < out_pd->m0; k++) + { + memcpy(out_pd->X + k * n0, pd->X + (k / d1) * n0, n0 * sizeof(double)); + } + } + else /* BROADCAST_COL */ + { + (void) d1; + size_t child_block = pd->m0 * n0; + for (int j = 0; j < d2; j++) + { + memcpy(out_pd->X + j * child_block, pd->X, child_block * sizeof(double)); + } + } +} + +static matrix *permuted_dense_vtable_diag_vec_alloc(matrix *self) +{ + const permuted_dense *pd = (const permuted_dense *) self; + int n = pd->base.m; + int out_m = n * n; + + if (pd->m0 == 0) + { + return new_permuted_dense(out_m, pd->base.n, 0, pd->n0, NULL, pd->col_perm, + NULL); + } + + int *new_row_perm = (int *) SP_MALLOC(pd->m0 * sizeof(int)); + for (int ii = 0; ii < pd->m0; ii++) + { + new_row_perm[ii] = pd->row_perm[ii] * (n + 1); + } + matrix *out = new_permuted_dense(out_m, pd->base.n, pd->m0, pd->n0, new_row_perm, + pd->col_perm, NULL); + free(new_row_perm); + return out; +} + +static void permuted_dense_vtable_diag_vec_fill_values(matrix *self, matrix *out) +{ + const permuted_dense *pd = (const permuted_dense *) self; + permuted_dense *out_pd = (permuted_dense *) out; + if (pd->m0 == 0) + { + return; + } + memcpy(out_pd->X, pd->X, pd->m0 * pd->n0 * sizeof(double)); +} + +/* ===== Operator-role adapters: PD acting as the constant left operand of + left_matmul. Currently restricted to full-block PDs (m0 == m, n0 == n, + identity perms) — the only operator shape any caller needs today. */ + +static void permuted_dense_vtable_block_left_mult_vec(const matrix *A, + const double *x, double *y, + int p) +{ + /* Full-block precondition: A->x is a single contiguous row-major m x n + block (perms are identity). For a non-trivial PD, A->x still points + at pd->X but X only stores the values at the permuted positions; the + layout below assumes a full m x n matrix, hence the assert. */ + assert(((const permuted_dense *) A)->m0 == A->m && + ((const permuted_dense *) A)->n0 == A->n); + + /* y = kron(I_p, A) @ x via a single dgemm. + Input x is p blocks of length n (block-interleaved); output y is p + blocks of length m. That's identical in memory to row-major matrices + of shape (p, n) and (p, m) respectively, so we can compute + y (p x m) = x (p x n) * A^T (n x m) + in one shot. CblasRowMajor + CblasNoTrans on x + CblasTrans on A + gives exactly that. */ + cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasTrans, p, A->m, A->n, 1.0, x, A->n, + A->x, A->n, 0.0, y, A->m); +} + +static CSC_matrix * +permuted_dense_vtable_block_left_mult_sparsity(const matrix *A, const CSC_matrix *J, + int p) +{ + const permuted_dense *pd = (const permuted_dense *) A; + assert(pd->m0 == A->m && pd->n0 == A->n); + /* Pre-size dwork for the subsequent block_left_mult_values fill, which + densifies a sparse column of J (size A->n) before applying A. Honors + the no-alloc-in-fill rule. */ + permuted_dense_ensure_dwork(pd, (size_t) A->n); + return I_kron_A_alloc(A, J, p); +} + +static void permuted_dense_vtable_block_left_mult_values(const matrix *A, + const CSC_matrix *J, + CSC_matrix *C) +{ + const permuted_dense *pd = (const permuted_dense *) A; + assert(pd->m0 == A->m && pd->n0 == A->n); + I_kron_A_fill_values(A, J, C, pd->dwork); +} + +matrix *new_permuted_dense(int m, int n, int m0, int n0, const int *row_perm, + const int *col_perm, const double *X_data) +{ + /* Validate sorted invariants. */ + for (int ii = 1; ii < m0; ii++) + { + assert(row_perm[ii] > row_perm[ii - 1]); + } + for (int jj = 1; jj < n0; jj++) + { + assert(col_perm[jj] > col_perm[jj - 1]); + } + if (m0 > 0) + { + assert(row_perm[0] >= 0 && row_perm[m0 - 1] < m); + } + if (n0 > 0) + { + assert(col_perm[0] >= 0 && col_perm[n0 - 1] < n); + } + + permuted_dense *pd = (permuted_dense *) SP_CALLOC(1, sizeof(permuted_dense)); + pd->base.m = m; + pd->base.n = n; + pd->base.nnz = m0 * n0; + pd->base.block_left_mult_vec = permuted_dense_vtable_block_left_mult_vec; + pd->base.block_left_mult_sparsity = + permuted_dense_vtable_block_left_mult_sparsity; + pd->base.block_left_mult_values = permuted_dense_vtable_block_left_mult_values; + pd->base.copy_sparsity = permuted_dense_vtable_copy_sparsity; + pd->base.DA_fill_values = permuted_dense_vtable_DA_fill_values; + pd->base.ATA_alloc = permuted_dense_vtable_ATA_alloc; + pd->base.ATDA_fill_values = permuted_dense_vtable_ATDA_fill_values; + pd->base.to_csr = permuted_dense_to_csr; + pd->base.transpose_alloc = permuted_dense_vtable_transpose_alloc; + pd->base.transpose_fill_values = permuted_dense_vtable_transpose_fill_values; + pd->base.is_permuted_dense = true; + pd->base.index_alloc = permuted_dense_vtable_index_alloc; + pd->base.index_fill_values = permuted_dense_vtable_index_fill_values; + pd->base.promote_alloc = permuted_dense_vtable_promote_alloc; + pd->base.promote_fill_values = permuted_dense_vtable_promote_fill_values; + pd->base.broadcast_alloc = permuted_dense_vtable_broadcast_alloc; + pd->base.broadcast_fill_values = permuted_dense_vtable_broadcast_fill_values; + pd->base.diag_vec_alloc = permuted_dense_vtable_diag_vec_alloc; + pd->base.diag_vec_fill_values = permuted_dense_vtable_diag_vec_fill_values; + pd->base.refresh_csc_values = permuted_dense_refresh_csc_values; + pd->base.free_fn = permuted_dense_free; + + pd->m0 = m0; + pd->n0 = n0; + + int sz = m0 * n0; + pd->row_perm = (int *) SP_MALLOC(m0 * sizeof(int)); + pd->col_perm = (int *) SP_MALLOC(n0 * sizeof(int)); + pd->X = (double *) SP_MALLOC(sz * sizeof(double)); + pd->base.x = pd->X; + /* dwork is allocated lazily by kernels via permuted_dense_ensure_dwork. + SP_CALLOC above already zeroed dwork / dwork_size, but make it + explicit. */ + pd->dwork = NULL; + pd->dwork_size = 0; + pd->col_inv = (int *) SP_MALLOC(n * sizeof(int)); + pd->row_inv = (int *) SP_MALLOC(m * sizeof(int)); + + if (m0 > 0) + { + memcpy(pd->row_perm, row_perm, m0 * sizeof(int)); + } + if (n0 > 0) + { + memcpy(pd->col_perm, col_perm, n0 * sizeof(int)); + } + + for (int j = 0; j < n; j++) + { + pd->col_inv[j] = -1; + } + for (int jj = 0; jj < n0; jj++) + { + pd->col_inv[col_perm[jj]] = jj; + } + + for (int i = 0; i < m; i++) + { + pd->row_inv[i] = -1; + } + for (int ii = 0; ii < m0; ii++) + { + pd->row_inv[row_perm[ii]] = ii; + } + + if (X_data != NULL && sz > 0) + { + memcpy(pd->X, X_data, sz * sizeof(double)); + } + + return &pd->base; +} + +matrix *new_permuted_dense_full(int m, int n, const double *data) +{ + int *row_perm = (int *) SP_MALLOC(m * sizeof(int)); + int *col_perm = (int *) SP_MALLOC(n * sizeof(int)); + for (int i = 0; i < m; i++) row_perm[i] = i; + for (int j = 0; j < n; j++) col_perm[j] = j; + matrix *out = new_permuted_dense(m, n, m, n, row_perm, col_perm, data); + free(row_perm); + free(col_perm); + return out; +} + +static CSR_matrix *permuted_dense_to_csr_alloc(const permuted_dense *A) +{ + int m0 = A->m0; + int n0 = A->n0; + int m = A->base.m; + CSR_matrix *C = new_CSR_matrix(m, A->base.n, m0 * n0); + + /* Alias C->x to A->X: the dense block layout already matches what the + CSR_matrix view's value array would hold, so values are always live with no + memcpy needed. The PD owns the buffer; permuted_dense_free nulls + C->x before free_CSR_matrix to avoid double-free. */ + free(C->x); + C->x = A->X; + + /* fill column indices (each dense row contributes a copy of col_perm) */ + for (int ii = 0; ii < m0; ii++) + { + memcpy(C->i + ii * n0, A->col_perm, n0 * sizeof(int)); + } + + /* set row pointers via count and then cumulative sum */ + memset(C->p, 0, (m + 1) * sizeof(int)); + for (int ii = 0; ii < m0; ii++) + { + C->p[A->row_perm[ii] + 1] = n0; + } + + for (int i = 0; i < m; i++) + { + C->p[i + 1] += C->p[i]; + } + + return C; +} + +void DA_pd_fill_values(const double *d, const permuted_dense *A, permuted_dense *C) +{ + int m0 = A->m0; + int n0 = A->n0; + cblas_dcopy(m0 * n0, A->X, 1, C->X, 1); + for (int ii = 0; ii < m0; ii++) + { + cblas_dscal(n0, d[A->row_perm[ii]], C->X + ii * n0, 1); + } +} + +/* Ensure pd->dwork is sized at least `size` doubles. Grows in place; + contents are NOT preserved. Called from allocator functions so that the + corresponding fill kernels never need to allocate. Takes a const pointer + and casts internally — this matches the dwork contract (header) that + dwork is mutable through a const permuted_dense *. */ +static void permuted_dense_ensure_dwork(const permuted_dense *pd_const, size_t size) +{ + permuted_dense *pd = (permuted_dense *) pd_const; + if (pd->dwork_size >= size) return; + free(pd->dwork); + pd->dwork = (double *) SP_MALLOC(size * sizeof(double)); + pd->dwork_size = size; +} + +matrix *ATA_pd_alloc(const permuted_dense *A) +{ + int n = A->base.n; + /* C = AT @ A has a dense block of size n0 x n0, with row and column index + sets given by A's col_perm. (This follows from Cij = ai^T aj where + ai and aj are columns of A. Here, ai and aj always have overlapping entries, + so Cij != 0 for (i, j) in A->col_perm x A->col_perm) */ + + /* Pre-size A's dwork for the ATDA fill (Y-buffer = diag(d_perm) X). */ + permuted_dense_ensure_dwork(A, (size_t) A->m0 * A->n0); + + return new_permuted_dense(n, n, A->n0, A->n0, A->col_perm, A->col_perm, NULL); +} + +void ATDA_pd_fill_values(const permuted_dense *A, const double *d, permuted_dense *C) +{ + int m0 = A->m0; + int n0 = A->n0; + + /* dwork = diag(d_perm) @ X, where d_perm[ii] = d[row_perm[ii]]. */ + cblas_dcopy(m0 * n0, A->X, 1, A->dwork, 1); + for (int ii = 0; ii < m0; ii++) + { + cblas_dscal(n0, d[A->row_perm[ii]], A->dwork + ii * n0, 1); + } + + /* C = XT @ dwork = XT @ diag(d_perm) @ X */ + cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, n0, n0, m0, 1.0, A->X, n0, + A->dwork, n0, 0.0, C->X, n0); +} + +matrix *BTA_pd_pd_alloc(const permuted_dense *B, const permuted_dense *A) +{ + /* if A and B have no overlapping rows, then C = BT @ A is empty */ + if (!has_overlap(A->row_perm, A->m0, B->row_perm, B->m0, 0)) + { + return new_permuted_dense(B->base.n, A->base.n, 0, 0, NULL, NULL, NULL); + } + + /* otherwise C has a dense block of size B->n0 x A->n0, with row and column + index sets given by B->col_perm and A->col_perm, respectively */ + matrix *C = new_permuted_dense(B->base.n, A->base.n, B->n0, A->n0, B->col_perm, + A->col_perm, NULL); + + /* Pre-size A's and B's dwork for the BTA fill slow path (gathered row + buffers). Each operand needs s_max rows of its own n0 doubles, where + s_max = MIN(A->m0, B->m0) bounds the intersection of row_perms. */ + int s_max = MIN(A->m0, B->m0); + permuted_dense_ensure_dwork(A, (size_t) s_max * A->n0); + permuted_dense_ensure_dwork(B, (size_t) s_max * B->n0); + + /* Pre-allocate C->iwork for idx_A + idx_B in BTA / BTDA_pd_pd slow paths + (each needs at most s_max ints; we store both arrays back-to-back + in iwork, hence 2 * s_max). */ + permuted_dense *C_pd = (permuted_dense *) C; + C_pd->iwork_size = (size_t) 2 * s_max; + C_pd->iwork = (int *) SP_MALLOC(C_pd->iwork_size * sizeof(int)); + + return C; +} + +/* Return 1 iff arrays a and b of length n are element-wise equal. */ +static int int_arrays_equal(const int *a, const int *b, int n) +{ + for (int i = 0; i < n; i++) + { + if (a[i] != b[i]) return 0; + } + return 1; +} + +/* Find intersection of two sorted, ascending int arrays. For each pair of positions + (ii, jj) where a[ii] == b[jj], write ii into idx_a and jj into idx_b. Returns the + count of matches. Buffers idx_a and idx_b must have capacity >= min(a_len, b_len); + no allocation is performed. */ +static inline int sorted_intersect_indices(const int *a, int a_len, const int *b, + int b_len, int *idx_a, int *idx_b) +{ + int s = 0; + int ii = 0, jj = 0; + while (ii < a_len && jj < b_len) + { + int ra = a[ii]; + int rb = b[jj]; + if (ra == rb) + { + idx_a[s] = ii; + idx_b[s] = jj; + s++; + ii++; + jj++; + } + else if (ra < rb) + { + ii++; + } + else + { + jj++; + } + } + return s; +} + +void BTA_pd_pd_fill_values(const permuted_dense *B, const permuted_dense *A, + permuted_dense *C) +{ + /* C may be empty if there is no overlap in row permutations */ + if (C->base.nnz == 0) + { + return; + } + + /* if B and A have identical row_perms, one matmul suffices */ + if (A->m0 == B->m0 && int_arrays_equal(A->row_perm, B->row_perm, A->m0)) + { + cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, B->n0, A->n0, A->m0, + 1.0, B->X, B->n0, A->X, A->n0, 0.0, C->X, A->n0); + return; + } + + // ----------------------------------------------------------------------- + // find intersection of row permutations. C->iwork was pre-sized by + // BTA_pd_pd_alloc to 2 * MIN(A->m0, B->m0) ints (idx_A | idx_B back- + // to-back), so no allocation here. + // ----------------------------------------------------------------------- + int s_max = MIN(A->m0, B->m0); + int *idx_A = C->iwork; + int *idx_B = C->iwork + s_max; + int s = sorted_intersect_indices(A->row_perm, A->m0, B->row_perm, B->m0, idx_A, + idx_B); + assert(s > 0); + + // ------------------------------------------------------------------------ + // Gather the matching rows into A->dwork and B->dwork. dwork is pre-sized + // by BTA_pd_pd_alloc (one ensure_dwork call per operand at alloc time). + // ------------------------------------------------------------------------ + for (int k = 0; k < s; k++) + { + memcpy(A->dwork + k * A->n0, A->X + idx_A[k] * A->n0, + A->n0 * sizeof(double)); + memcpy(B->dwork + k * B->n0, B->X + idx_B[k] * B->n0, + B->n0 * sizeof(double)); + } + + /* matmul on the gathered rows */ + cblas_dgemm(CblasRowMajor, CblasTrans, CblasNoTrans, B->n0, A->n0, s, 1.0, + B->dwork, B->n0, A->dwork, A->n0, 0.0, C->X, A->n0); +} + +void BTDA_pd_pd_fill_values(const permuted_dense *B, const double *d, + const permuted_dense *A, permuted_dense *C) +{ + /* C may be empty if there is no overlap in row permutations of A and B */ + if (C->base.nnz == 0) + { + return; + } + + /* TODO: must remove this allocation. Very important. The DA + intermediate PD is allocated and freed on every Hessian iteration + — violates the no-alloc-in-fill policy. Fix is to fold diag(d) + directly into BTA_pd_pd_fill_values's gather/dgemm (either via a + shared internal helper that takes an optional d, or by rewriting + this kernel inline using pre-sized A->dwork). */ + /* C = BT @ (DA) */ + permuted_dense *DA = (permuted_dense *) A->base.copy_sparsity(&A->base); + DA_pd_fill_values(d, A, DA); + BTA_pd_pd_fill_values(B, DA, C); + free_matrix(&DA->base); +} + +/* The CSR-flavored kernels for (B=Sparse, A=PD) live in src/old-code; the + production path uses BTA_csc_pd_alloc / BTDA_csc_pd_fill_values defined + further below, which delegate to BTA_pd_csc via the (A^T B)^T identity. */ + +/* Return true if any of the 'len' integers in 'indices' exist in the set + marked by 'inv' (inv[k] != -1 iff k is in the set). */ +static inline bool idxs_hits_set(const int *idxs, int len, const int *inv) +{ + for (int ii = 0; ii < len; ii++) + { + if (inv[idxs[ii]] != -1) + { + return true; + } + } + return false; +} + +/* Inner product of a sparse vector (vals[0..len) at positions idxs[0..len)) + with a dense vector, where inv maps each idxs value to a position in + 'dense' (inv[k] == -1 means skip that entry). */ +static inline double sparse_dot_dense(const double *vals, const int *idxs, int len, + const int *inv, const double *dense) +{ + double sum = 0.0; + for (int e = 0; e < len; e++) + { + int kk = inv[idxs[e]]; + if (kk == -1) + { + continue; + } + sum += vals[e] * dense[kk]; + } + return sum; +} + +matrix *BA_pd_csc_alloc(const permuted_dense *B, const CSC_matrix *A) +{ + /* Cij != 0 if row i of B overlaps with column j of A. So we loop through + the columns of A. For each column of A, we check if it has any nonzeros in + rows that are in B's col_perm. If yes, column j of C will have a nonzero + block corresponding to the rows of B */ + iVec *col_perm_C = iVec_new(10); + for (int j = 0; j < A->n; j++) + { + int start = A->p[j]; + int len = A->p[j + 1] - start; + if (idxs_hits_set(A->i + start, len, B->col_inv)) + { + iVec_append(col_perm_C, j); + } + } + + matrix *C = new_permuted_dense(B->base.m, A->n, B->m0, col_perm_C->len, + B->row_perm, col_perm_C->data, NULL); + iVec_free(col_perm_C); + return C; +} + +void BA_pd_csc_fill_values(const double *B, int n0_B, const int *inv, + const CSC_matrix *A, permuted_dense *C) +{ + /* C[i, j] = bi^T @ ajj, where bi is the ith row of B_X (length n0_B, + row stride n0_B) and ajj is the jjth column of A's sparse block + (column jj = C->col_perm[j]). inv maps A's row indices to positions + in B_X (entries with inv[r] == -1 are skipped). */ + + /* row i of C */ + for (int i = 0; i < C->m0; i++) + { + double *ci = C->X + i * C->n0; + + /* col j of C */ + for (int j = 0; j < C->n0; j++) + { + + int jj = C->col_perm[j]; + int start = A->p[jj]; + int len = A->p[jj + 1] - start; + /* we compute entry C[i, j] */ + ci[j] = + sparse_dot_dense(A->x + start, A->i + start, len, inv, B + i * n0_B); + } + } +} + +matrix *BA_pd_pd_alloc(const permuted_dense *B, const permuted_dense *A) +{ + /* if B's columns don't overlap with A's rows, C = B @ A is empty */ + if (!has_overlap(B->col_perm, B->n0, A->row_perm, A->m0, 0)) + { + return new_permuted_dense(B->base.m, A->base.n, 0, 0, NULL, NULL, NULL); + } + + /* otherwise C has a dense block of size B->m0 x A->n0, with row index + set B->row_perm and column index set A->col_perm. */ + matrix *C = new_permuted_dense(B->base.m, A->base.n, B->m0, A->n0, B->row_perm, + A->col_perm, NULL); + + int s_max = MIN(B->n0, A->m0); + + /* Pre-size B's and A's dwork for the gathers in fill. Worst-case + intersection size is s_max; B_sub is (m0, s) and A_sub is (s, n0). */ + permuted_dense_ensure_dwork(A, (size_t) s_max * A->n0); + permuted_dense_ensure_dwork(B, (size_t) s_max * B->m0); + + /* Pre-allocate C->iwork for idx_B + idx_A back-to-back (2 * s_max ints), + same idiom as BTA_pd_pd_alloc. */ + permuted_dense *C_pd = (permuted_dense *) C; + C_pd->iwork_size = (size_t) 2 * s_max; + C_pd->iwork = (int *) SP_MALLOC(C_pd->iwork_size * sizeof(int)); + + return C; +} + +/* TODO: do we want to reuse BTA_pd_pd_fill_values? */ +void BA_pd_pd_fill_values(const permuted_dense *B, const permuted_dense *A, + permuted_dense *C) +{ + /* C may be empty when B->col_perm and A->row_perm don't overlap. */ + if (C->base.nnz == 0) + { + return; + } + + /* if B's col_perm and A's row_perm are identical, one matmul suffices */ + if (B->n0 == A->m0 && int_arrays_equal(B->col_perm, A->row_perm, B->n0)) + { + cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, B->m0, A->n0, B->n0, + 1.0, B->X, B->n0, A->X, A->n0, 0.0, C->X, A->n0); + return; + } + + // ----------------------------------------------------------------------- + // find intersection of B's col_perm and A's row_perm. C->iwork was + // pre-sized by BA_pd_pd_alloc to 2 * MIN(B->n0, A->m0) ints (idx_B | + // idx_A back-to-back), so no allocation here. + // ----------------------------------------------------------------------- + int s_max = MIN(B->n0, A->m0); + int *idx_B = C->iwork; + int *idx_A = C->iwork + s_max; + int s = sorted_intersect_indices(B->col_perm, B->n0, A->row_perm, A->m0, idx_B, + idx_A); + assert(s > 0); + + // ------------------------------------------------------------------------ + // Gather the matching slices into B->dwork (column gather) and A->dwork + // (row gather). dwork is pre-sized by BA_pd_pd_alloc (one ensure_dwork + // call per operand at alloc time). + // ------------------------------------------------------------------------ + /* B_sub shape (B->m0, s) row-major: B_sub[ii, kk] = B->X[ii, idx_B[kk]]. */ + for (int ii = 0; ii < B->m0; ii++) + { + for (int kk = 0; kk < s; kk++) + { + B->dwork[ii * s + kk] = B->X[ii * B->n0 + idx_B[kk]]; + } + } + /* A_sub shape (s, A->n0) row-major: A_sub[kk, :] = A->X[idx_A[kk], :]. */ + for (int kk = 0; kk < s; kk++) + { + memcpy(A->dwork + kk * A->n0, A->X + idx_A[kk] * A->n0, + A->n0 * sizeof(double)); + } + + /* matmul on the gathered slices */ + cblas_dgemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, B->m0, A->n0, s, 1.0, + B->dwork, s, A->dwork, A->n0, 0.0, C->X, A->n0); +} + +matrix *BTA_pd_csc_alloc(const permuted_dense *B, const CSC_matrix *A) +{ + /* Cij != 0 if column i of B overlaps with column j of A. So we loop + through the columns of A. For each column of A, we check if it has any + nonzeros in rows that are in B's row_perm. If yes, column j of C will + have a nonzero block corresponding to the columns of B */ + iVec *col_active = iVec_new(8); + for (int j = 0; j < A->n; j++) + { + int start = A->p[j]; + int len = A->p[j + 1] - start; + if (idxs_hits_set(A->i + start, len, B->row_inv)) + { + iVec_append(col_active, j); + } + } + + matrix *C = new_permuted_dense(B->base.n, A->n, B->n0, col_active->len, + B->col_perm, col_active->data, NULL); + iVec_free(col_active); + + /* Pre-size B's dwork for the BTDA fill (holds (diag(d) B)^T). */ + permuted_dense_ensure_dwork(B, (size_t) B->m0 * B->n0); + + return C; +} + +/* C = B^T diag(d) A = (diag (d) B)^T A */ +void BTDA_pd_csc_fill_values(const permuted_dense *B, const double *d, + const CSC_matrix *A, permuted_dense *C) +{ + /* C may be empty */ + if (C->base.nnz == 0) + { + return; + } + + int m0 = B->m0; + int n0 = B->n0; + + /* conpute B->dwork = (diag(d) B)^T */ + for (int kk = 0; kk < m0; kk++) + { + double dk = d[B->row_perm[kk]]; + for (int ii = 0; ii < n0; ii++) + { + B->dwork[ii * m0 + kk] = dk * B->X[kk * n0 + ii]; + } + } + + BA_pd_csc_fill_values(B->dwork, m0, B->row_inv, A, C); +} + +matrix *BTA_csc_pd_alloc(const CSC_matrix *B, const permuted_dense *A) +{ + /* Cij != 0 if column i of B overlaps with row j of A. So we loop through the + columns of B. For each column of B, we check if it has any nonzeros in rows + that are in A->row_perm. If yes, column i of C will have a nonzero block + corresponding to the columns of A */ + + iVec *row_active = iVec_new(10); + for (int i = 0; i < B->n; i++) + { + int start = B->p[i]; + int len = B->p[i + 1] - start; + if (idxs_hits_set(B->i + start, len, A->row_inv)) + { + iVec_append(row_active, i); + } + } + + matrix *C = new_permuted_dense(B->n, A->base.n, row_active->len, A->n0, + row_active->data, A->col_perm, NULL); + iVec_free(row_active); + + /* Pre-size A's dwork for the BTDA fill (holds (diag(d_perm) X_A)^T). */ + permuted_dense_ensure_dwork(A, (size_t) A->m0 * A->n0); + + return C; +} + +/* Internal helper for BTDA_csc_pd_fill_values: C = B^T @ A where B is CSC + and the right operand A is supplied as a transposed-layout raw buffer + (row j of A_T = m0_A contiguous doubles = the j-th column of A's dense + block). Transposed-output sibling of BA_pd_csc_fill_values. */ +static void BTA_csc_pd_fill_values(const CSC_matrix *B, const double *A_T, int m0_A, + const int *inv, permuted_dense *C) +{ + /* C[i_C, j_C] = dot(col C->row_perm[i_C] of B, row j_C of A_T). */ + for (int i_C = 0; i_C < C->m0; i_C++) + { + int B_col = C->row_perm[i_C]; + int start = B->p[B_col]; + int len = B->p[B_col + 1] - start; + double *ci = C->X + i_C * C->n0; + for (int j_C = 0; j_C < C->n0; j_C++) + { + ci[j_C] = sparse_dot_dense(B->x + start, B->i + start, len, inv, + A_T + j_C * m0_A); + } + } +} + +/* C = B^T diag(d) A. Folds diag(d) into A's dense block (writing + (diag(d_perm) X_A)^T into A->dwork) and delegates to BTA_csc_pd_fill_values. + Mirrors how BTDA_pd_csc_fill_values wraps BA_pd_csc_fill_values. */ +void BTDA_csc_pd_fill_values(const CSC_matrix *B, const double *d, + const permuted_dense *A, permuted_dense *C) +{ + if (C->base.nnz == 0) + { + return; + } + + int m0_A = A->m0; + int n0_A = A->n0; + + /* A->dwork = (diag(d_perm) X_A)^T, row-major shape (n0_A, m0_A). + Pre-sized by BTA_csc_pd_alloc; no allocation in fill. + Column j of (diag(d) X_A) lives contiguously in dwork as row j — + which is exactly the layout BTA_csc_pd_fill_values wants. */ + for (int kk = 0; kk < m0_A; kk++) + { + double dk = d[A->row_perm[kk]]; + for (int jj = 0; jj < n0_A; jj++) + { + A->dwork[jj * m0_A + kk] = dk * A->X[kk * n0_A + jj]; + } + } + + BTA_csc_pd_fill_values(B, A->dwork, m0_A, A->row_inv, C); +} + +/* Original transpose-via-Cprime implementation of BTDA_csc_pd_fill_values. + No longer linked; preserved here as in-file reference for the math + identity C = (A^T diag(d) B)^T and the BA_pd_csc_fill_values delegation. */ +#if defined(__GNUC__) || defined(__clang__) +__attribute__((unused)) +#endif +static void +BTDA_csc_pd_fill_values_via_transpose_dead(const CSC_matrix *B, const double *d, + const permuted_dense *A, + permuted_dense *C) +{ + if (C->base.nnz == 0) + { + return; + } + + /* Cprime has shape (A->n0, |row_active|) — i.e. C transposed. */ + matrix *Cprime_m = BTA_pd_csc_alloc(A, B); + permuted_dense *Cprime = (permuted_dense *) Cprime_m; + BTDA_pd_csc_fill_values(A, d, B, Cprime); + + /* C->X = Cprime->X^T. Cprime has dims (C->n0, C->m0). */ + int m0 = C->m0; + int n0 = C->n0; + for (int i = 0; i < m0; i++) + { + for (int j = 0; j < n0; j++) + { + C->X[i * n0 + j] = Cprime->X[j * m0 + i]; + } + } + + free_matrix(Cprime_m); +} diff --git a/src/utils/sparse_matrix.c b/src/utils/sparse_matrix.c index 0237346..211ac31 100644 --- a/src/utils/sparse_matrix.c +++ b/src/utils/sparse_matrix.c @@ -15,71 +15,350 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#include "utils/sparse_matrix.h" + +#include "utils/CSC_matrix.h" #include "utils/linalg_sparse_matmuls.h" #include "utils/matrix.h" +#include "utils/mini_numpy.h" #include "utils/tracked_alloc.h" #include #include -static void sparse_block_left_mult_vec(const Matrix *self, const double *x, +static void sparse_block_left_mult_vec(const matrix *self, const double *x, double *y, int p) { - const Sparse_Matrix *sm = (const Sparse_Matrix *) self; + const sparse_matrix *sm = (const sparse_matrix *) self; block_left_multiply_vec(sm->csr, x, y, p); } -static CSC_Matrix *sparse_block_left_mult_sparsity(const Matrix *self, - const CSC_Matrix *J, int p) +static CSC_matrix *sparse_block_left_mult_sparsity(const matrix *self, + const CSC_matrix *J, int p) { - const Sparse_Matrix *sm = (const Sparse_Matrix *) self; + const sparse_matrix *sm = (const sparse_matrix *) self; return block_left_multiply_fill_sparsity(sm->csr, J, p); } -static void sparse_block_left_mult_values(const Matrix *self, const CSC_Matrix *J, - CSC_Matrix *C) +static void sparse_block_left_mult_values(const matrix *self, const CSC_matrix *J, + CSC_matrix *C) { - const Sparse_Matrix *sm = (const Sparse_Matrix *) self; + const sparse_matrix *sm = (const sparse_matrix *) self; block_left_multiply_fill_values(sm->csr, J, C); } -static void sparse_update_values(Matrix *self, const double *new_values) +static void sparse_free(matrix *self) { - Sparse_Matrix *sm = (Sparse_Matrix *) self; - memcpy(sm->csr->x, new_values, sm->csr->nnz * sizeof(double)); + sparse_matrix *sm = (sparse_matrix *) self; + free_CSR_matrix(sm->csr); + free_CSC_matrix(sm->csc_cache); + free(sm->csc_iwork); + free(sm->transpose_iwork); + free(sm); } -static void sparse_free(Matrix *self) +/* Forward decl: ctor is referenced by copy_sparsity below. */ +matrix *new_sparse_matrix(CSR_matrix *A); + +/* Build the CSC_matrix cache structure if absent. Values are NOT filled here; caller + must call refresh_csc_values before consuming. ATA_alloc only needs structure, + so it's safe to call without a subsequent refresh. */ +void sparse_matrix_ensure_csc_cache(sparse_matrix *sm) { - Sparse_Matrix *sm = (Sparse_Matrix *) self; - free_csr_matrix(sm->csr); - free(sm); + if (sm->csc_cache != NULL) return; + sm->csc_iwork = (int *) SP_MALLOC(sm->csr->n * sizeof(int)); + sm->csc_cache = csr_to_csc_alloc(sm->csr, sm->csc_iwork); } -Matrix *new_sparse_matrix(const CSR_Matrix *A) +static matrix *sparse_copy_sparsity(const matrix *self) +{ + const sparse_matrix *sm = (const sparse_matrix *) self; + return new_sparse_matrix(new_csr_copy_sparsity(sm->csr)); +} + +static void sparse_DA_fill_values(const double *d, const matrix *self, matrix *out) +{ + const sparse_matrix *sm = (const sparse_matrix *) self; + sparse_matrix *sm_out = (sparse_matrix *) out; + DA_fill_values(d, sm->csr, sm_out->csr); +} + +static matrix *sparse_ATA_alloc(matrix *self) +{ + sparse_matrix *sm = (sparse_matrix *) self; + sparse_matrix_ensure_csc_cache(sm); + return new_sparse_matrix(ATA_alloc(sm->csc_cache)); +} + +/* Caller must have called refresh_csc_values since the last change to csr->x. */ +static void sparse_ATDA_fill_values(const matrix *self, const double *d, matrix *out) +{ + const sparse_matrix *sm = (const sparse_matrix *) self; + sparse_matrix *sm_out = (sparse_matrix *) out; + ATDA_fill_values(sm->csc_cache, d, sm_out->csr); +} + +static CSR_matrix *sparse_to_csr(matrix *self) +{ + return ((sparse_matrix *) self)->csr; +} + +static matrix *sparse_transpose_alloc(const matrix *self) +{ + const sparse_matrix *sm = (const sparse_matrix *) self; + int *iwork = (int *) SP_MALLOC(sm->csr->n * sizeof(int)); + CSR_matrix *AT = AT_alloc(sm->csr, iwork); + sparse_matrix *out = (sparse_matrix *) new_sparse_matrix(AT); + out->transpose_iwork = iwork; + return &out->base; +} + +static void sparse_transpose_fill_values(const matrix *self, matrix *out) +{ + const sparse_matrix *sm_in = (const sparse_matrix *) self; + sparse_matrix *sm_out = (sparse_matrix *) out; + AT_fill_values(sm_in->csr, sm_out->csr, sm_out->transpose_iwork); +} + +static matrix *sparse_index_alloc(matrix *self, const int *indices, int n_idxs) +{ + CSR_matrix *Jx = ((sparse_matrix *) self)->csr; + CSR_matrix *J = new_CSR_matrix(n_idxs, self->n, Jx->nnz); + + J->p[0] = 0; + for (int i = 0; i < n_idxs; i++) + { + int row = indices[i]; + int len = Jx->p[row + 1] - Jx->p[row]; + memcpy(J->i + J->p[i], Jx->i + Jx->p[row], len * sizeof(int)); + J->p[i + 1] = J->p[i] + len; + } + J->nnz = J->p[n_idxs]; + return new_sparse_matrix(J); +} + +static void sparse_index_fill_values(matrix *self, const int *indices, int n_idxs, + matrix *out) +{ + CSR_matrix *Jx = ((sparse_matrix *) self)->csr; + CSR_matrix *J = ((sparse_matrix *) out)->csr; + for (int i = 0; i < n_idxs; i++) + { + int len = J->p[i + 1] - J->p[i]; + memcpy(J->x + J->p[i], Jx->x + Jx->p[indices[i]], len * sizeof(double)); + } +} + +static matrix *sparse_promote_alloc(matrix *self, int size) +{ + CSR_matrix *Jx = ((sparse_matrix *) self)->csr; + int row_nnz = Jx->nnz; + CSR_matrix *J = new_CSR_matrix(size, self->n, size * row_nnz); + + for (int row = 0; row < size; row++) + { + J->p[row] = row * row_nnz; + memcpy(J->i + row * row_nnz, Jx->i, row_nnz * sizeof(int)); + } + J->p[size] = size * row_nnz; + J->nnz = size * row_nnz; + return new_sparse_matrix(J); +} + +static void sparse_promote_fill_values(matrix *self, matrix *out) +{ + CSR_matrix *Jx = ((sparse_matrix *) self)->csr; + int row_nnz = Jx->nnz; + for (int row = 0; row < out->m; row++) + { + memcpy(out->x + row * row_nnz, Jx->x, row_nnz * sizeof(double)); + } +} + +static matrix *sparse_broadcast_alloc(matrix *self, broadcast_type type, int d1, + int d2) +{ + CSR_matrix *Jx = ((sparse_matrix *) self)->csr; + int out_m = d1 * d2; + int total_nnz; + if (type == BROADCAST_ROW) + { + total_nnz = Jx->nnz * d1; + } + else if (type == BROADCAST_COL) + { + total_nnz = Jx->nnz * d2; + } + else /* BROADCAST_SCALAR */ + { + total_nnz = Jx->nnz * out_m; + } + + CSR_matrix *J = new_CSR_matrix(out_m, self->n, total_nnz); + + if (type == BROADCAST_ROW) + { + int acc = 0; + for (int i = 0; i < d2; i++) + { + int nnz_in_row = Jx->p[i + 1] - Jx->p[i]; + tile_int(J->i + acc, Jx->i + Jx->p[i], nnz_in_row, d1); + for (int rep = 0; rep < d1; rep++) + { + J->p[i * d1 + rep] = acc; + acc += nnz_in_row; + } + } + J->p[out_m] = total_nnz; + } + else if (type == BROADCAST_COL) + { + tile_int(J->i, Jx->i, Jx->nnz, d2); + int offset = 0; + for (int i = 0; i < d2; i++) + { + for (int j = 0; j < d1; j++) + { + int nnz_in_row = Jx->p[j + 1] - Jx->p[j]; + J->p[i * d1 + j] = offset; + offset += nnz_in_row; + } + } + J->p[out_m] = total_nnz; + } + else /* BROADCAST_SCALAR */ + { + tile_int(J->i, Jx->i, Jx->nnz, out_m); + int row_nnz = Jx->nnz; + for (int i = 0; i < out_m; i++) + { + J->p[i] = i * row_nnz; + } + J->p[out_m] = total_nnz; + } + return new_sparse_matrix(J); +} + +static void sparse_broadcast_fill_values(matrix *self, broadcast_type type, int d1, + int d2, matrix *out) +{ + CSR_matrix *Jx = ((sparse_matrix *) self)->csr; + if (type == BROADCAST_ROW) + { + int acc = 0; + for (int i = 0; i < d2; i++) + { + int nnz_in_row = Jx->p[i + 1] - Jx->p[i]; + tile_double(out->x + acc, Jx->x + Jx->p[i], nnz_in_row, d1); + acc += nnz_in_row * d1; + } + } + else if (type == BROADCAST_COL) + { + tile_double(out->x, Jx->x, Jx->nnz, d2); + } + else /* BROADCAST_SCALAR */ + { + tile_double(out->x, Jx->x, Jx->nnz, d1 * d2); + } +} + +static matrix *sparse_diag_vec_alloc(matrix *self) +{ + CSR_matrix *Jx = ((sparse_matrix *) self)->csr; + int n = self->m; + int out_m = n * n; + CSR_matrix *J = new_CSR_matrix(out_m, self->n, Jx->nnz); + + int nnz = 0; + int next_diag = 0; + for (int row = 0; row < out_m; row++) + { + J->p[row] = nnz; + if (row == next_diag) + { + int child_row = row / (n + 1); + int len = Jx->p[child_row + 1] - Jx->p[child_row]; + memcpy(J->i + nnz, Jx->i + Jx->p[child_row], len * sizeof(int)); + nnz += len; + next_diag += n + 1; + } + } + J->p[out_m] = nnz; + J->nnz = nnz; + return new_sparse_matrix(J); +} + +static void sparse_diag_vec_fill_values(matrix *self, matrix *out) +{ + CSR_matrix *Jx = ((sparse_matrix *) self)->csr; + CSR_matrix *J = ((sparse_matrix *) out)->csr; + int n = self->m; + for (int i = 0; i < n; i++) + { + int out_row = i * (n + 1); + int len = J->p[out_row + 1] - J->p[out_row]; + memcpy(J->x + J->p[out_row], Jx->x + Jx->p[i], len * sizeof(double)); + } +} + +/* Build CSC_matrix structure on first call; refill values from csr->x on every call. */ +static void sparse_refresh_csc_values(matrix *self) +{ + sparse_matrix *sm = (sparse_matrix *) self; + sparse_matrix_ensure_csc_cache(sm); + csr_to_csc_fill_values(sm->csr, sm->csc_cache, sm->csc_iwork); +} + +static void wire_vtable(sparse_matrix *sm) { - Sparse_Matrix *sm = (Sparse_Matrix *) SP_CALLOC(1, sizeof(Sparse_Matrix)); - sm->base.m = A->m; - sm->base.n = A->n; sm->base.block_left_mult_vec = sparse_block_left_mult_vec; sm->base.block_left_mult_sparsity = sparse_block_left_mult_sparsity; sm->base.block_left_mult_values = sparse_block_left_mult_values; - sm->base.update_values = sparse_update_values; + sm->base.copy_sparsity = sparse_copy_sparsity; + sm->base.DA_fill_values = sparse_DA_fill_values; + sm->base.ATA_alloc = sparse_ATA_alloc; + sm->base.ATDA_fill_values = sparse_ATDA_fill_values; + sm->base.to_csr = sparse_to_csr; + sm->base.transpose_alloc = sparse_transpose_alloc; + sm->base.transpose_fill_values = sparse_transpose_fill_values; + sm->base.index_alloc = sparse_index_alloc; + sm->base.index_fill_values = sparse_index_fill_values; + sm->base.promote_alloc = sparse_promote_alloc; + sm->base.promote_fill_values = sparse_promote_fill_values; + sm->base.broadcast_alloc = sparse_broadcast_alloc; + sm->base.broadcast_fill_values = sparse_broadcast_fill_values; + sm->base.diag_vec_alloc = sparse_diag_vec_alloc; + sm->base.diag_vec_fill_values = sparse_diag_vec_fill_values; + sm->base.refresh_csc_values = sparse_refresh_csc_values; sm->base.free_fn = sparse_free; - sm->csr = new_csr(A); +} + +matrix *new_sparse_matrix(CSR_matrix *A) +{ + sparse_matrix *sm = (sparse_matrix *) SP_CALLOC(1, sizeof(sparse_matrix)); + sm->base.m = A->m; + sm->base.n = A->n; + sm->base.nnz = A->nnz; + sm->base.x = A->x; + wire_vtable(sm); + sm->csr = A; return &sm->base; } -Matrix *sparse_matrix_trans(const Sparse_Matrix *self, int *iwork) +matrix *new_sparse_matrix_alloc(int m, int n, int nnz) +{ + return new_sparse_matrix(new_CSR_matrix(m, n, nnz)); +} + +matrix *sparse_matrix_trans(const sparse_matrix *self, int *iwork) { - CSR_Matrix *AT = transpose(self->csr, iwork); - Sparse_Matrix *sm = (Sparse_Matrix *) SP_CALLOC(1, sizeof(Sparse_Matrix)); + CSR_matrix *AT = transpose(self->csr, iwork); + sparse_matrix *sm = (sparse_matrix *) SP_CALLOC(1, sizeof(sparse_matrix)); sm->base.m = AT->m; sm->base.n = AT->n; - sm->base.block_left_mult_vec = sparse_block_left_mult_vec; - sm->base.block_left_mult_sparsity = sparse_block_left_mult_sparsity; - sm->base.block_left_mult_values = sparse_block_left_mult_values; - sm->base.update_values = sparse_update_values; - sm->base.free_fn = sparse_free; + sm->base.nnz = AT->nnz; + sm->base.x = AT->x; + wire_vtable(sm); sm->csr = AT; return &sm->base; } diff --git a/src/utils/utils.c b/src/utils/utils.c index 5163b3c..9d504a1 100644 --- a/src/utils/utils.c +++ b/src/utils/utils.c @@ -30,3 +30,22 @@ void sort_int_array(int *array, int size) { qsort(array, size, sizeof(int), compare_int_asc); } + +bool has_overlap(const int *a_idx, int a_len, const int *b_idx, int b_len, + int b_offset) +{ + int ai = 0, bi = 0; + while (ai < a_len && bi < b_len) + { + if (a_idx[ai] == b_idx[bi] - b_offset) return true; + if (a_idx[ai] < b_idx[bi] - b_offset) + { + ai++; + } + else + { + bi++; + } + } + return false; +} diff --git a/tests/all_tests.c b/tests/all_tests.c index 76807b6..853308f 100644 --- a/tests/all_tests.c +++ b/tests/all_tests.c @@ -55,17 +55,20 @@ #include "jacobian_tests/other/test_prod_axis_zero.h" #include "jacobian_tests/other/test_quad_form.h" #include "numerical_diff/test_numerical_diff.h" +#include "old-code/test_old_permuted_dense.h" #include "problem/test_param_broadcast.h" #include "problem/test_param_prob.h" #include "problem/test_problem.h" #include "utils/test_cblas.h" -#include "utils/test_coo_matrix.h" +#include "utils/test_COO_matrix.h" #include "utils/test_csc_matrix.h" #include "utils/test_csr_csc_conversion.h" #include "utils/test_csr_matrix.h" #include "utils/test_linalg_sparse_matmuls.h" #include "utils/test_linalg_utils_matmul_chain_rule.h" #include "utils/test_matrix.h" +#include "utils/test_matrix_BTA.h" +#include "utils/test_permuted_dense.h" #include "wsum_hess/affine/test_broadcast.h" #include "wsum_hess/affine/test_convolve.h" #include "wsum_hess/affine/test_diag_mat.h" @@ -102,7 +105,10 @@ #endif /* PROFILE_ONLY */ #ifdef PROFILE_ONLY +#include "profiling/profile_BTA_pd_csr_vs_csc.h" #include "profiling/profile_left_matmul.h" +#include "profiling/profile_log_reg.h" +#include "profiling/profile_trimmed_log_reg.h" #endif /* PROFILE_ONLY */ int main(void) @@ -218,12 +224,15 @@ int main(void) mu_run_test(test_jacobian_left_matmul_log, tests_run); mu_run_test(test_jacobian_left_matmul_log_matrix, tests_run); mu_run_test(test_jacobian_left_matmul_exp_composite, tests_run); + mu_run_test(test_jacobian_left_matmul_pd_from_composite_child, tests_run); + mu_run_test(test_jacobian_left_matmul_pd_param, tests_run); mu_run_test(test_jacobian_right_matmul_log, tests_run); mu_run_test(test_jacobian_right_matmul_log_vector, tests_run); mu_run_test(test_jacobian_matmul, tests_run); mu_run_test(test_jacobian_convolve, tests_run); mu_run_test(test_jacobian_convolve_composite, tests_run); mu_run_test(test_jacobian_transpose, tests_run); + mu_run_test(test_jacobian_transpose_pd_preserved, tests_run); mu_run_test(test_diag_mat_jacobian_variable, tests_run); mu_run_test(test_diag_mat_jacobian_of_log, tests_run); mu_run_test(test_upper_tri_jacobian_variable, tests_run); @@ -284,6 +293,7 @@ int main(void) mu_run_test(test_wsum_hess_left_matmul, tests_run); mu_run_test(test_wsum_hess_left_matmul_matrix, tests_run); mu_run_test(test_wsum_hess_left_matmul_exp_composite, tests_run); + mu_run_test(test_wsum_hess_left_matmul_dense_matrix_exp, tests_run); mu_run_test(test_wsum_hess_matmul, tests_run); mu_run_test(test_wsum_hess_matmul_yx, tests_run); mu_run_test(test_wsum_hess_right_matmul, tests_run); @@ -351,11 +361,48 @@ int main(void) mu_run_test(test_csr_to_coo, tests_run); mu_run_test(test_csr_to_coo_lower_triangular, tests_run); mu_run_test(test_refresh_lower_triangular_coo, tests_run); - mu_run_test(test_dense_matrix_mult_vec, tests_run); - mu_run_test(test_dense_matrix_mult_vec_blocks, tests_run); - mu_run_test(test_sparse_vs_dense_mult_vec, tests_run); - mu_run_test(test_dense_matrix_trans, tests_run); - mu_run_test(test_sparse_vs_dense_mult_vec_blocks, tests_run); + mu_run_test(test_pd_mult_vec_basic, tests_run); + mu_run_test(test_pd_mult_vec_blocks, tests_run); + mu_run_test(test_sparse_vs_pd_mult_vec, tests_run); + mu_run_test(test_pd_trans_full_block, tests_run); + mu_run_test(test_sparse_vs_pd_mult_vec_blocks, tests_run); + mu_run_test(test_pd_operator_block_left_mult_vec, tests_run); + mu_run_test(test_permuted_dense_to_csr_basic, tests_run); + mu_run_test(test_permuted_dense_to_csr_empty, tests_run); + mu_run_test(test_permuted_dense_to_csr_full, tests_run); + mu_run_test(test_permuted_dense_to_csr_single_row, tests_run); + mu_run_test(test_permuted_dense_to_csr_single_col, tests_run); + mu_run_test(test_DA_pd_fill_values, tests_run); + mu_run_test(test_ATA_pd_alloc, tests_run); + mu_run_test(test_ATDA_pd_fill_values, tests_run); + mu_run_test(test_permuted_dense_times_csc, tests_run); + mu_run_test(test_permuted_dense_times_csc_no_active, tests_run); + mu_run_test(test_permuted_dense_to_csr_lazy, tests_run); + mu_run_test(test_permuted_dense_col_inv, tests_run); + mu_run_test(test_permuted_dense_index, tests_run); + mu_run_test(test_permuted_dense_promote, tests_run); + mu_run_test(test_permuted_dense_broadcast_scalar, tests_run); + mu_run_test(test_permuted_dense_broadcast_row, tests_run); + mu_run_test(test_permuted_dense_broadcast_col, tests_run); + mu_run_test(test_permuted_dense_diag_vec, tests_run); + mu_run_test(test_permuted_dense_BTA_matching_row_perm, tests_run); + mu_run_test(test_permuted_dense_BTA_empty_overlap, tests_run); + mu_run_test(test_permuted_dense_BTA_partial_overlap, tests_run); + mu_run_test(test_permuted_dense_BTDA_decomposition, tests_run); + mu_run_test(test_BTA_pd_csc_matches_csr, tests_run); + mu_run_test(test_BA_pd_matrices_pd_pd_full_block_B, tests_run); + mu_run_test(test_BA_pd_matrices_pd_pd_general_B, tests_run); + mu_run_test(test_BA_pd_matrices_pd_csc, tests_run); + mu_run_test(test_BA_pd_matrices_fast_path, tests_run); + mu_run_test(test_BTA_pd_csr_basic, tests_run); + mu_run_test(test_BTA_pd_csr_leaf_variable, tests_run); + mu_run_test(test_BTA_pd_csr_no_overlap, tests_run); + mu_run_test(test_BTA_csr_pd_basic, tests_run); + mu_run_test(test_BTA_csr_pd_leaf_variable, tests_run); + mu_run_test(test_BTA_csr_pd_no_overlap, tests_run); + mu_run_test(test_BTDA_matrices_pd_pd, tests_run); + mu_run_test(test_BTDA_matrices_csr_pd, tests_run); + mu_run_test(test_BTDA_matrices_pd_csr, tests_run); mu_run_test(test_YT_kron_I, tests_run); mu_run_test(test_YT_kron_I_larger, tests_run); mu_run_test(test_I_kron_X, tests_run); @@ -401,6 +448,9 @@ int main(void) #ifdef PROFILE_ONLY printf("\n--- Profiling Tests ---\n"); mu_run_test(profile_left_matmul, tests_run); + mu_run_test(profile_log_reg, tests_run); + mu_run_test(profile_trimmed_log_reg, tests_run); + mu_run_test(profile_BTA_pd_csr_vs_csc, tests_run); #endif /* PROFILE_ONLY */ printf("\n=== All %d tests passed ===\n", tests_run); diff --git a/tests/forward_pass/affine/test_add.h b/tests/forward_pass/affine/test_add.h index 1f8a61a..4b14ae3 100644 --- a/tests/forward_pass/affine/test_add.h +++ b/tests/forward_pass/affine/test_add.h @@ -5,6 +5,7 @@ #include "atoms/affine.h" #include "expr.h" #include "minunit.h" +#include "subexpr.h" #include "test_helpers.h" const char *test_addition(void) diff --git a/tests/forward_pass/affine/test_diag_mat.h b/tests/forward_pass/affine/test_diag_mat.h index cb20a05..d5703ac 100644 --- a/tests/forward_pass/affine/test_diag_mat.h +++ b/tests/forward_pass/affine/test_diag_mat.h @@ -8,7 +8,7 @@ const char *test_diag_mat_forward(void) { /* 3x3 matrix variable (column-major): [1,2,3,4,5,6,7,8,9] - * Matrix: 1 4 7 + * matrix: 1 4 7 * 2 5 8 * 3 6 9 * Diagonal: (0,0)=1, (1,1)=5, (2,2)=9 */ diff --git a/tests/forward_pass/affine/test_linear_op.h b/tests/forward_pass/affine/test_linear_op.h index 801a403..8a8f9f8 100644 --- a/tests/forward_pass/affine/test_linear_op.h +++ b/tests/forward_pass/affine/test_linear_op.h @@ -11,14 +11,14 @@ const char *test_linear_op(void) { - /* create CSR matrix + /* create CSR_matrix matrix A = [0 0 2 3 0 0] [0 0 1 0 2 0] [0 0 3 4 5 0] */ double Ax[7] = {2.0, 3.0, 1.0, 2.0, 3.0, 4.0, 5.0}; int Ai[7] = {2, 3, 2, 4, 2, 3, 4}; int Ap[4] = {0, 2, 4, 7}; - CSR_Matrix *A = new_csr_matrix(3, 6, 7); + CSR_matrix *A = new_CSR_matrix(3, 6, 7); memcpy(A->x, Ax, 7 * sizeof(double)); memcpy(A->i, Ai, 7 * sizeof(int)); memcpy(A->p, Ap, 4 * sizeof(int)); @@ -31,6 +31,6 @@ const char *test_linear_op(void) double expected[3] = {8, 7, 26}; mu_assert("fail", cmp_double_array(linear_node->value, expected, 3)); free_expr(linear_node); - free_csr_matrix(A); + free_CSR_matrix(A); return 0; } diff --git a/tests/forward_pass/affine/test_upper_tri.h b/tests/forward_pass/affine/test_upper_tri.h index 056c3a5..3db43e8 100644 --- a/tests/forward_pass/affine/test_upper_tri.h +++ b/tests/forward_pass/affine/test_upper_tri.h @@ -8,7 +8,7 @@ const char *test_upper_tri_forward_4x4(void) { /* 4x4 matrix variable (column-major): [1..16] - * Matrix: 1 5 9 13 + * matrix: 1 5 9 13 * 2 6 10 14 * 3 7 11 15 * 4 8 12 16 diff --git a/tests/jacobian_tests/affine/test_broadcast.h b/tests/jacobian_tests/affine/test_broadcast.h index f41082d..a1f79ed 100644 --- a/tests/jacobian_tests/affine/test_broadcast.h +++ b/tests/jacobian_tests/affine/test_broadcast.h @@ -37,12 +37,9 @@ const char *test_broadcast_row_jacobian(void) int expected_p[7] = {0, 1, 2, 3, 4, 5, 6}; int expected_i[6] = {0, 0, 1, 1, 2, 2}; - mu_assert("broadcast row jacobian vals fail", - cmp_double_array(bcast->jacobian->x, expected_x, 6)); - mu_assert("broadcast row jacobian rows fail", - cmp_int_array(bcast->jacobian->p, expected_p, 4)); - mu_assert("broadcast row jacobian cols fail", - cmp_int_array(bcast->jacobian->i, expected_i, 6)); + mu_assert("vals fail", cmp_values(bcast->jacobian, expected_x, 6)); + mu_assert("sparsity fail", + cmp_sparsity(bcast->jacobian, expected_p, expected_i, 6, 6)); free_expr(bcast); return 0; @@ -82,12 +79,9 @@ const char *test_broadcast_col_jacobian(void) int expected_p[7] = {0, 1, 2, 3, 4, 5, 6}; int expected_i[6] = {0, 1, 2, 0, 1, 2}; - mu_assert("broadcast col jacobian vals fail", - cmp_double_array(bcast->jacobian->x, expected_x, 6)); - mu_assert("broadcast col jacobian rows fail", - cmp_int_array(bcast->jacobian->p, expected_p, 7)); - mu_assert("broadcast col jacobian cols fail", - cmp_int_array(bcast->jacobian->i, expected_i, 6)); + mu_assert("vals fail", cmp_values(bcast->jacobian, expected_x, 6)); + mu_assert("sparsity fail", + cmp_sparsity(bcast->jacobian, expected_p, expected_i, 6, 6)); free_expr(bcast); return 0; @@ -123,12 +117,9 @@ const char *test_broadcast_scalar_to_matrix_jacobian(void) int expected_p[7] = {0, 1, 2, 3, 4, 5, 6}; int expected_i[6] = {0, 0, 0, 0, 0, 0}; - mu_assert("broadcast scalar jacobian vals fail", - cmp_double_array(bcast->jacobian->x, expected_x, 6)); - mu_assert("broadcast scalar jacobian rows fail", - cmp_int_array(bcast->jacobian->p, expected_p, 7)); - mu_assert("broadcast scalar jacobian cols fail", - cmp_int_array(bcast->jacobian->i, expected_i, 6)); + mu_assert("vals fail", cmp_values(bcast->jacobian, expected_x, 6)); + mu_assert("sparsity fail", + cmp_sparsity(bcast->jacobian, expected_p, expected_i, 6, 6)); free_expr(bcast); return 0; @@ -150,17 +141,18 @@ const char *test_double_broadcast(void) jacobian_init(sum); sum->eval_jacobian(sum); + /* TODO: what is this test? */ + /* All 6 elements depend on the single input variable */ // double expected_x[6] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; // int expected_p[7] = {0, 1, 2, 3, 4, 5, 6}; // int expected_i[6] = {0, 0, 0, 0, 0, 0}; // - // mu_assert("broadcast scalar jacobian vals fail", - // cmp_double_array(sum->jacobian->x, expected_x, 6)); - // mu_assert("broadcast scalar jacobian rows fail", - // cmp_int_array(sum ->jacobian->p, expected_p, 7)); - // mu_assert("broadcast scalar jacobian cols fail", - // cmp_int_array(bcast->jacobian->i, expected_i, 6)); + // mu_assert("broadcast scalar jacobian vals fail", // + // cmp_double_array(sum->jacobian->x, expected_x, 6)); mu_assert("broadcast + // scalar jacobian rows fail", // cmp_int_array(sum ->jacobian->p, expected_p, + // 7)); mu_assert("broadcast scalar jacobian cols fail", // + // cmp_int_array(bcast->jacobian->to_csr(bcast->jacobian)->i, // expected_i, 6)); free_expr(sum); return 0; diff --git a/tests/jacobian_tests/affine/test_convolve.h b/tests/jacobian_tests/affine/test_convolve.h index c3aae98..0eb4db1 100644 --- a/tests/jacobian_tests/affine/test_convolve.h +++ b/tests/jacobian_tests/affine/test_convolve.h @@ -18,7 +18,7 @@ const char *test_jacobian_convolve(void) * [3, 2, 1], * [0, 3, 2], * [0, 0, 3]] - * stored in CSR with nnz = 9, shape 5 x 3. */ + * stored in CSR_matrix with nnz = 9, shape 5 x 3. */ double kernel[3] = {1.0, 2.0, 3.0}; expr *kernel_param = new_parameter(3, 1, PARAM_FIXED, 3, kernel); expr *x = new_variable(3, 1, 0, 3); @@ -37,12 +37,9 @@ const char *test_jacobian_convolve(void) int expected_i[9] = {0, 0, 1, 0, 1, 2, 1, 2, 2}; double expected_x[9] = {1.0, 2.0, 1.0, 3.0, 2.0, 1.0, 3.0, 2.0, 3.0}; - mu_assert("Convolve Jacobian row pointers incorrect", - cmp_int_array(y->jacobian->p, expected_p, 6)); - mu_assert("Convolve Jacobian column indices incorrect", - cmp_int_array(y->jacobian->i, expected_i, 9)); - mu_assert("Convolve Jacobian values incorrect", - cmp_double_array(y->jacobian->x, expected_x, 9)); + mu_assert("sparsity fail", + cmp_sparsity(y->jacobian, expected_p, expected_i, 5, 9)); + mu_assert("vals fail", cmp_values(y->jacobian, expected_x, 9)); free_expr(y); return 0; diff --git a/tests/jacobian_tests/affine/test_diag_mat.h b/tests/jacobian_tests/affine/test_diag_mat.h index b1031ac..9b669a0 100644 --- a/tests/jacobian_tests/affine/test_diag_mat.h +++ b/tests/jacobian_tests/affine/test_diag_mat.h @@ -10,7 +10,7 @@ const char *test_diag_mat_jacobian_variable(void) { /* diag_mat of a 2x2 variable (4 vars total) * Diagonal indices in column-major: [0, 3] - * Jacobian is 2x4 CSR: row 0 has col 0, row 1 has col 3 */ + * Jacobian is 2x4 CSR_matrix: row 0 has col 0, row 1 has col 3 */ double u[4] = {1.0, 2.0, 3.0, 4.0}; expr *var = new_variable(2, 2, 0, 4); expr *dm = new_diag_mat(var); @@ -23,9 +23,9 @@ const char *test_diag_mat_jacobian_variable(void) int expected_p[3] = {0, 1, 2}; int expected_i[2] = {0, 3}; - mu_assert("diag_mat jac vals", cmp_double_array(dm->jacobian->x, expected_x, 2)); - mu_assert("diag_mat jac p", cmp_int_array(dm->jacobian->p, expected_p, 3)); - mu_assert("diag_mat jac i", cmp_int_array(dm->jacobian->i, expected_i, 2)); + mu_assert("vals fail", cmp_values(dm->jacobian, expected_x, 2)); + mu_assert("sparsity fail", + cmp_sparsity(dm->jacobian, expected_p, expected_i, 2, 2)); free_expr(dm); return 0; @@ -49,12 +49,12 @@ const char *test_diag_mat_jacobian_of_log(void) dm->eval_jacobian(dm); double expected_x[2] = {1.0, 0.25}; + int expected_p[3] = {0, 1, 2}; int expected_i[2] = {0, 3}; - mu_assert("diag_mat log jac vals", - cmp_double_array(dm->jacobian->x, expected_x, 2)); - mu_assert("diag_mat log jac cols", - cmp_int_array(dm->jacobian->i, expected_i, 2)); + mu_assert("vals fail", cmp_values(dm->jacobian, expected_x, 2)); + mu_assert("sparsity fail", + cmp_sparsity(dm->jacobian, expected_p, expected_i, 2, 2)); free_expr(dm); return 0; diff --git a/tests/jacobian_tests/affine/test_hstack.h b/tests/jacobian_tests/affine/test_hstack.h index a5b163c..017730e 100644 --- a/tests/jacobian_tests/affine/test_hstack.h +++ b/tests/jacobian_tests/affine/test_hstack.h @@ -42,9 +42,9 @@ const char *test_jacobian_hstack_vectors(void) int expected_Ai[9] = {0, 1, 2, 0, 1, 2, 0, 1, 2}; int expected_Ap[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - mu_assert("vals fail", cmp_double_array(stack->jacobian->x, expected_Ax, 9)); - mu_assert("cols fail", cmp_int_array(stack->jacobian->i, expected_Ai, 9)); - mu_assert("rows fail", cmp_int_array(stack->jacobian->p, expected_Ap, 10)); + mu_assert("vals fail", cmp_values(stack->jacobian, expected_Ax, 9)); + mu_assert("sparsity fail", + cmp_sparsity(stack->jacobian, expected_Ap, expected_Ai, 9, 9)); free_expr(stack); return 0; @@ -86,9 +86,9 @@ const char *test_jacobian_hstack_matrix(void) int expected_Ap[19] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18}; - mu_assert("vals fail", cmp_double_array(stack->jacobian->x, expected_Ax, 18)); - mu_assert("cols fail", cmp_int_array(stack->jacobian->i, expected_Ai, 18)); - mu_assert("rows fail", cmp_int_array(stack->jacobian->p, expected_Ap, 19)); + mu_assert("vals fail", cmp_values(stack->jacobian, expected_Ax, 18)); + mu_assert("sparsity fail", + cmp_sparsity(stack->jacobian, expected_Ap, expected_Ai, 18, 18)); free_expr(stack); return 0; diff --git a/tests/jacobian_tests/affine/test_index.h b/tests/jacobian_tests/affine/test_index.h index 8cdd9f7..eeda8c2 100644 --- a/tests/jacobian_tests/affine/test_index.h +++ b/tests/jacobian_tests/affine/test_index.h @@ -53,12 +53,12 @@ const char *test_index_jacobian_of_variable(void) /* Jacobian is 2x3 with pattern: row 0 selects col 0, row 1 selects col 2 */ double expected_x[2] = {1.0, 1.0}; - int expected_p[3] = {0, 1, 2}; /* CSR row ptrs */ + int expected_p[3] = {0, 1, 2}; /* CSR_matrix row ptrs */ int expected_i[2] = {0, 2}; /* column indices */ - mu_assert("index jac vals", cmp_double_array(idx->jacobian->x, expected_x, 2)); - mu_assert("index jac p", cmp_int_array(idx->jacobian->p, expected_p, 3)); - mu_assert("index jac i", cmp_int_array(idx->jacobian->i, expected_i, 2)); + mu_assert("vals fail", cmp_values(idx->jacobian, expected_x, 2)); + mu_assert("sparsity fail", + cmp_sparsity(idx->jacobian, expected_p, expected_i, 2, 2)); free_expr(idx); return 0; @@ -76,16 +76,17 @@ const char *test_index_jacobian_of_log(void) jacobian_init(idx); idx->eval_jacobian(idx); - /* d/dx log(x) = diag(1/x), then select rows 0 and 2 + /* d/dx log(x) = diag(1/x), then select rows 0 and 2. + * Each selected row has exactly one nonzero (the diagonal entry). * Row 0: 1/1 = 1.0 at col 0 * Row 1: 1/4 = 0.25 at col 2 */ double expected_x[2] = {1.0, 0.25}; + int expected_p[3] = {0, 1, 2}; int expected_i[2] = {0, 2}; - mu_assert("index of log jac vals", - cmp_double_array(idx->jacobian->x, expected_x, 2)); - mu_assert("index of log jac cols", - cmp_int_array(idx->jacobian->i, expected_i, 2)); + mu_assert("vals fail", cmp_values(idx->jacobian, expected_x, 2)); + mu_assert("sparsity fail", + cmp_sparsity(idx->jacobian, expected_p, expected_i, 2, 2)); free_expr(idx); return 0; @@ -107,12 +108,9 @@ const char *test_index_jacobian_repeated(void) int expected_p[3] = {0, 1, 2}; int expected_i[2] = {0, 0}; /* Both reference col 0 */ - mu_assert("index repeated jac vals", - cmp_double_array(idx->jacobian->x, expected_x, 2)); - mu_assert("index repeated row ptr", - cmp_int_array(idx->jacobian->p, expected_p, 3)); - mu_assert("index repeated jac i", - cmp_int_array(idx->jacobian->i, expected_i, 2)); + mu_assert("vals fail", cmp_values(idx->jacobian, expected_x, 2)); + mu_assert("sparsity fail", + cmp_sparsity(idx->jacobian, expected_p, expected_i, 2, 2)); free_expr(idx); return 0; @@ -133,12 +131,14 @@ const char *test_sum_of_index(void) jacobian_init(s); s->eval_jacobian(s); - /* Gradient: [1, 0, 1] in sparse form */ + /* Gradient: [1, 0, 1] in sparse form. Single output row holds both nnz. */ double expected_x[2] = {1.0, 1.0}; + int expected_p[2] = {0, 2}; int expected_i[2] = {0, 2}; - mu_assert("sum of index vals", cmp_double_array(s->jacobian->x, expected_x, 2)); - mu_assert("sum of index cols", cmp_int_array(s->jacobian->i, expected_i, 2)); + mu_assert("vals fail", cmp_values(s->jacobian, expected_x, 2)); + mu_assert("sparsity fail", + cmp_sparsity(s->jacobian, expected_p, expected_i, 1, 2)); free_expr(s); return 0; diff --git a/tests/jacobian_tests/affine/test_left_matmul.h b/tests/jacobian_tests/affine/test_left_matmul.h index a0c1385..18a27c8 100644 --- a/tests/jacobian_tests/affine/test_left_matmul.h +++ b/tests/jacobian_tests/affine/test_left_matmul.h @@ -8,6 +8,7 @@ #include "minunit.h" #include "numerical_diff.h" #include "test_helpers.h" +#include "utils/permuted_dense.h" const char *test_jacobian_left_matmul_log(void) { @@ -23,7 +24,7 @@ const char *test_jacobian_left_matmul_log(void) * [5, 0, 2 ] * [7, 0, 0 ] * - * Stored in CSR format (4x3 sparse): + * Stored in CSR_matrix format (4x3 sparse): * nnz = 7 * p = [0, 2, 4, 6, 7] * i = [0, 2, 0, 2, 0, 2, 0] @@ -32,8 +33,8 @@ const char *test_jacobian_left_matmul_log(void) double x_vals[3] = {1.0, 2.0, 3.0}; expr *x = new_variable(3, 1, 0, 3); - /* Create sparse matrix A in CSR format */ - CSR_Matrix *A = new_csr_matrix(4, 3, 7); + /* Create sparse matrix A in CSR_matrix format */ + CSR_matrix *A = new_CSR_matrix(4, 3, 7); int A_p[5] = {0, 2, 4, 6, 7}; int A_i[7] = {0, 2, 0, 2, 0, 2, 0}; double A_x[7] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}; @@ -61,11 +62,11 @@ const char *test_jacobian_left_matmul_log(void) int expected_Ai[7] = {0, 2, 0, 2, 0, 2, 0}; int expected_Ap[5] = {0, 2, 4, 6, 7}; - mu_assert("vals fail", cmp_double_array(A_log_x->jacobian->x, expected_Ax, 7)); - mu_assert("cols fail", cmp_int_array(A_log_x->jacobian->i, expected_Ai, 7)); - mu_assert("rows fail", cmp_int_array(A_log_x->jacobian->p, expected_Ap, 5)); + mu_assert("vals fail", cmp_values(A_log_x->jacobian, expected_Ax, 7)); + mu_assert("sparsity fail", + cmp_sparsity(A_log_x->jacobian, expected_Ap, expected_Ai, 4, 7)); - free_csr_matrix(A); + free_CSR_matrix(A); free_expr(A_log_x); return 0; } @@ -76,8 +77,8 @@ const char *test_jacobian_left_matmul_log_matrix(void) double x_vals[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; expr *x = new_variable(3, 2, 0, 6); - /* Create sparse matrix A in CSR format (4x3) */ - CSR_Matrix *A = new_csr_matrix(4, 3, 7); + /* Create sparse matrix A in CSR_matrix format (4x3) */ + CSR_matrix *A = new_CSR_matrix(4, 3, 7); int A_p[5] = {0, 2, 4, 6, 7}; int A_i[7] = {0, 2, 0, 2, 0, 2, 0}; double A_x[7] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}; @@ -100,11 +101,11 @@ const char *test_jacobian_left_matmul_log_matrix(void) int expected_Ai[14] = {0, 2, 0, 2, 0, 2, 0, 3, 5, 3, 5, 3, 5, 3}; int expected_Ap[9] = {0, 2, 4, 6, 7, 9, 11, 13, 14}; - mu_assert("vals fail", cmp_double_array(A_log_x->jacobian->x, expected_Ax, 14)); - mu_assert("cols fail", cmp_int_array(A_log_x->jacobian->i, expected_Ai, 14)); - mu_assert("rows fail", cmp_int_array(A_log_x->jacobian->p, expected_Ap, 9)); + mu_assert("vals fail", cmp_values(A_log_x->jacobian, expected_Ax, 14)); + mu_assert("sparsity fail", + cmp_sparsity(A_log_x->jacobian, expected_Ap, expected_Ai, 8, 14)); - free_csr_matrix(A); + free_CSR_matrix(A); free_expr(A_log_x); return 0; } @@ -116,7 +117,7 @@ const char *test_jacobian_left_matmul_exp_composite(void) expr *x = new_variable(3, 1, 0, 3); /* Create B matrix (3x3 all ones) */ - CSR_Matrix *B = new_csr_matrix(3, 3, 9); + CSR_matrix *B = new_CSR_matrix(3, 3, 9); int B_p[4] = {0, 3, 6, 9}; int B_i[9] = {0, 1, 2, 0, 1, 2, 0, 1, 2}; double B_x[9] = {1, 1, 1, 1, 1, 1, 1, 1, 1}; @@ -125,7 +126,7 @@ const char *test_jacobian_left_matmul_exp_composite(void) memcpy(B->x, B_x, 9 * sizeof(double)); /* Create A matrix */ - CSR_Matrix *A = new_csr_matrix(4, 3, 7); + CSR_matrix *A = new_CSR_matrix(4, 3, 7); int A_p[5] = {0, 2, 4, 6, 7}; int A_i[7] = {0, 2, 0, 2, 0, 2, 0}; double A_x[7] = {1, 2, 3, 4, 5, 6, 7}; @@ -140,8 +141,122 @@ const char *test_jacobian_left_matmul_exp_composite(void) mu_assert("check_jacobian failed", check_jacobian_num(A_exp_Bx, x_vals, NUMERICAL_DIFF_DEFAULT_H)); - free_csr_matrix(A); - free_csr_matrix(B); + free_CSR_matrix(A); + free_CSR_matrix(B); free_expr(A_exp_Bx); return 0; } + +/* outer = A2 @ (A1 @ x). Inner left_matmul produces a PD Jacobian via the + leaf-var fast path. Outer left_matmul sees a PD child Jacobian and must + fire the produce_pd_jacobian_from_child branch via BA_pd_matrices_*. + + x is a length-2 leaf variable at var_id=0, n_vars=2. + A1 is 3x2: [[1,2],[3,4],[5,6]] (row-major). + A2 is 4x3: [[1,0,1],[0,1,0],[1,0,1],[0,1,0]] (row-major). + Expected outer->jacobian: PD of shape (4, 2), row_perm=[0..3], + col_perm=[0,1], X = A2 @ A1 = [[6,8],[3,4],[6,8],[3,4]]. */ +const char *test_jacobian_left_matmul_pd_from_composite_child(void) +{ + double A1_data[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + double A2_data[12] = {1.0, 0.0, 1.0, 0.0, 1.0, 0.0, + 1.0, 0.0, 1.0, 0.0, 1.0, 0.0}; + + expr *x = new_variable(2, 1, 0, 2); + expr *A1_x = new_left_matmul_dense(NULL, x, 3, 2, A1_data); + expr *A2_A1_x = new_left_matmul_dense(NULL, A1_x, 4, 3, A2_data); + + double x_vals[2] = {0.5, -1.5}; + A2_A1_x->forward(A2_A1_x, x_vals); + jacobian_init(A2_A1_x); + A2_A1_x->eval_jacobian(A2_A1_x); + + /* Structural: outer's Jacobian must be PD (produced by the + jacobian_init_pd path via BA_pd_matrices_alloc). */ + mu_assert("outer Jacobian should be PD", A2_A1_x->jacobian->is_permuted_dense); + permuted_dense *pd = (permuted_dense *) A2_A1_x->jacobian; + mu_assert("global m", A2_A1_x->jacobian->m == 4); + mu_assert("global n", A2_A1_x->jacobian->n == 2); + mu_assert("m0", pd->m0 == 4); + mu_assert("n0", pd->n0 == 2); + int expected_row_perm[4] = {0, 1, 2, 3}; + int expected_col_perm[2] = {0, 1}; + mu_assert("row_perm", cmp_int_array(pd->row_perm, expected_row_perm, 4)); + mu_assert("col_perm", cmp_int_array(pd->col_perm, expected_col_perm, 2)); + + /* Numerical: X = A2 @ A1 (row-major 4x2). */ + double expected_X[8] = {6.0, 8.0, 3.0, 4.0, 6.0, 8.0, 3.0, 4.0}; + mu_assert("X values", cmp_double_array(pd->X, expected_X, 8)); + + /* Cross-check against numerical differentiation for paranoia. */ + mu_assert("check_jacobian failed", + check_jacobian_num(A2_A1_x, x_vals, NUMERICAL_DIFF_DEFAULT_H)); + + free_expr(A2_A1_x); + return 0; +} + +/* Parameterized A: A_param @ x with leaf-variable x and n_blocks == 1. + Verifies the PD path (jacobian_init_pd / eval_jacobian_pd) handles a + parameterized A — the structure is fixed at construction, refresh_dense_left + updates A->X before each forward, and eval_jacobian_pd reads those values + via BA_pd_matrices_fill_values. + + x is length 2 at var_id=0, n_vars=2. + A_param is 3x2. The parameter convention is column-major, so the param's + value array is column-major of A. We test two parameter assignments: + A = [[1,2],[3,4],[5,6]] column-major: [1,3,5,2,4,6] + A = [[7,8],[9,10],[11,12]] column-major: [7,9,11,8,10,12] + The Jacobian of A @ x w.r.t. x is just A itself, placed at the + variable's column slot. */ +const char *test_jacobian_left_matmul_pd_param(void) +{ + expr *x = new_variable(2, 1, 0, 2); + + double theta[6] = {1.0, 3.0, 5.0, 2.0, 4.0, 6.0}; + expr *A_param = new_parameter(3, 2, 0, 2, theta); + expr *A_x = new_left_matmul_dense(A_param, x, 3, 2, NULL); + + double x_vals[2] = {0.5, -1.5}; + A_x->forward(A_x, x_vals); + jacobian_init(A_x); + A_x->eval_jacobian(A_x); + + /* Structural: Jacobian must be PD. */ + mu_assert("Jacobian should be PD", A_x->jacobian->is_permuted_dense); + permuted_dense *pd = (permuted_dense *) A_x->jacobian; + mu_assert("global m", A_x->jacobian->m == 3); + mu_assert("global n", A_x->jacobian->n == 2); + mu_assert("m0", pd->m0 == 3); + mu_assert("n0", pd->n0 == 2); + int expected_row_perm[3] = {0, 1, 2}; + int expected_col_perm[2] = {0, 1}; + mu_assert("row_perm", cmp_int_array(pd->row_perm, expected_row_perm, 3)); + mu_assert("col_perm", cmp_int_array(pd->col_perm, expected_col_perm, 2)); + + /* Values: pd->X is row-major of A. */ + double expected_X1[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + mu_assert("X values (param 1)", cmp_double_array(pd->X, expected_X1, 6)); + + mu_assert("numerical check (param 1)", + check_jacobian_num(A_x, x_vals, NUMERICAL_DIFF_DEFAULT_H)); + + /* Update the parameter and re-evaluate. The PD output struct is reused; + only pd->X should change. */ + double theta2[6] = {7.0, 9.0, 11.0, 8.0, 10.0, 12.0}; + memcpy(A_param->value, theta2, 6 * sizeof(double)); + expr_set_needs_refresh(A_x); + + A_x->forward(A_x, x_vals); + A_x->eval_jacobian(A_x); + + mu_assert("Jacobian still PD after refresh", A_x->jacobian->is_permuted_dense); + double expected_X2[6] = {7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; + mu_assert("X values (param 2)", cmp_double_array(pd->X, expected_X2, 6)); + + mu_assert("numerical check (param 2)", + check_jacobian_num(A_x, x_vals, NUMERICAL_DIFF_DEFAULT_H)); + + free_expr(A_x); + return 0; +} diff --git a/tests/jacobian_tests/affine/test_neg.h b/tests/jacobian_tests/affine/test_neg.h index ca4e595..f4d859c 100644 --- a/tests/jacobian_tests/affine/test_neg.h +++ b/tests/jacobian_tests/affine/test_neg.h @@ -19,12 +19,9 @@ const char *test_neg_jacobian(void) int expected_p[4] = {0, 1, 2, 3}; int expected_i[3] = {0, 1, 2}; - mu_assert("neg jacobian vals fail", - cmp_double_array(neg_node->jacobian->x, expected_x, 3)); - mu_assert("neg jacobian rows fail", - cmp_int_array(neg_node->jacobian->p, expected_p, 4)); - mu_assert("neg jacobian cols fail", - cmp_int_array(neg_node->jacobian->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(neg_node->jacobian, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(neg_node->jacobian, expected_p, expected_i, 3, 3)); free_expr(neg_node); return 0; @@ -50,12 +47,9 @@ const char *test_neg_chain(void) int expected_p[4] = {0, 1, 2, 3}; int expected_i[3] = {0, 1, 2}; - mu_assert("neg chain jacobian vals fail", - cmp_double_array(neg2->jacobian->x, expected_x, 3)); - mu_assert("neg chain jacobian rows fail", - cmp_int_array(neg2->jacobian->p, expected_p, 4)); - mu_assert("neg chain jacobian cols fail", - cmp_int_array(neg2->jacobian->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(neg2->jacobian, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(neg2->jacobian, expected_p, expected_i, 3, 3)); free_expr(neg2); return 0; diff --git a/tests/jacobian_tests/affine/test_promote.h b/tests/jacobian_tests/affine/test_promote.h index ad2ab49..c0e31ed 100644 --- a/tests/jacobian_tests/affine/test_promote.h +++ b/tests/jacobian_tests/affine/test_promote.h @@ -22,12 +22,9 @@ const char *test_promote_scalar_jacobian(void) int expected_p[4] = {0, 1, 2, 3}; int expected_i[3] = {0, 0, 0}; - mu_assert("promote jacobian vals fail", - cmp_double_array(promote_node->jacobian->x, expected_x, 3)); - mu_assert("promote jacobian rows fail", - cmp_int_array(promote_node->jacobian->p, expected_p, 4)); - mu_assert("promote jacobian cols fail", - cmp_int_array(promote_node->jacobian->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(promote_node->jacobian, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(promote_node->jacobian, expected_p, expected_i, 3, 3)); free_expr(promote_node); return 0; @@ -54,12 +51,9 @@ const char *test_promote_scalar_to_matrix_jacobian(void) int expected_p[7] = {0, 1, 2, 3, 4, 5, 6}; int expected_i[6] = {0, 0, 0, 0, 0, 0}; - mu_assert("promote matrix jacobian vals fail", - cmp_double_array(promote_node->jacobian->x, expected_x, 6)); - mu_assert("promote matrix jacobian rows fail", - cmp_int_array(promote_node->jacobian->p, expected_p, 7)); - mu_assert("promote matrix jacobian cols fail", - cmp_int_array(promote_node->jacobian->i, expected_i, 6)); + mu_assert("vals fail", cmp_values(promote_node->jacobian, expected_x, 6)); + mu_assert("sparsity fail", + cmp_sparsity(promote_node->jacobian, expected_p, expected_i, 6, 6)); free_expr(promote_node); return 0; diff --git a/tests/jacobian_tests/affine/test_right_matmul.h b/tests/jacobian_tests/affine/test_right_matmul.h index 8c2fa04..4259172 100644 --- a/tests/jacobian_tests/affine/test_right_matmul.h +++ b/tests/jacobian_tests/affine/test_right_matmul.h @@ -17,8 +17,8 @@ const char *test_jacobian_right_matmul_log(void) double x_vals[4] = {1.0, 3.0, 2.0, 4.0}; // column-wise vectorization expr *x = new_variable(2, 2, 0, 4); - /* Create sparse matrix A in CSR format (2x3) */ - CSR_Matrix *A = new_csr_matrix(2, 3, 4); + /* Create sparse matrix A in CSR_matrix format (2x3) */ + CSR_matrix *A = new_CSR_matrix(2, 3, 4); int A_p[3] = {0, 2, 4}; int A_i[4] = {0, 2, 0, 2}; double A_x[4] = {1.0, 2.0, 3.0, 4.0}; @@ -47,11 +47,11 @@ const char *test_jacobian_right_matmul_log(void) int expected_Ai[8] = {0, 2, 1, 3, 0, 2, 1, 3}; int expected_Ap[7] = {0, 2, 4, 4, 4, 6, 8}; - mu_assert("vals fail", cmp_double_array(log_x_A->jacobian->x, expected_Ax, 8)); - mu_assert("cols fail", cmp_int_array(log_x_A->jacobian->i, expected_Ai, 8)); - mu_assert("rows fail", cmp_int_array(log_x_A->jacobian->p, expected_Ap, 7)); + mu_assert("vals fail", cmp_values(log_x_A->jacobian, expected_Ax, 8)); + mu_assert("sparsity fail", + cmp_sparsity(log_x_A->jacobian, expected_Ap, expected_Ai, 6, 8)); - free_csr_matrix(A); + free_CSR_matrix(A); free_expr(log_x_A); return 0; } @@ -66,8 +66,8 @@ const char *test_jacobian_right_matmul_log_vector(void) double x_vals[3] = {1.0, 2.0, 3.0}; expr *x = new_variable(1, 3, 0, 3); - /* Create sparse matrix A in CSR format (3x2) */ - CSR_Matrix *A = new_csr_matrix(3, 2, 4); + /* Create sparse matrix A in CSR_matrix format (3x2) */ + CSR_matrix *A = new_CSR_matrix(3, 2, 4); int A_p[4] = {0, 1, 3, 4}; int A_i[4] = {0, 0, 1, 1}; double A_x[4] = {1.0, 2.0, 3.0, 4.0}; @@ -92,11 +92,11 @@ const char *test_jacobian_right_matmul_log_vector(void) int expected_Ai[4] = {0, 1, 1, 2}; int expected_Ap[3] = {0, 2, 4}; - mu_assert("vals fail", cmp_double_array(log_x_A->jacobian->x, expected_Ax, 4)); - mu_assert("cols fail", cmp_int_array(log_x_A->jacobian->i, expected_Ai, 4)); - mu_assert("rows fail", cmp_int_array(log_x_A->jacobian->p, expected_Ap, 3)); + mu_assert("vals fail", cmp_values(log_x_A->jacobian, expected_Ax, 4)); + mu_assert("sparsity fail", + cmp_sparsity(log_x_A->jacobian, expected_Ap, expected_Ai, 2, 4)); - free_csr_matrix(A); + free_CSR_matrix(A); free_expr(log_x_A); return 0; } diff --git a/tests/jacobian_tests/affine/test_scalar_mult.h b/tests/jacobian_tests/affine/test_scalar_mult.h index a16a6fa..afdcdd4 100644 --- a/tests/jacobian_tests/affine/test_scalar_mult.h +++ b/tests/jacobian_tests/affine/test_scalar_mult.h @@ -36,12 +36,9 @@ const char *test_jacobian_scalar_mult_log_vector(void) int expected_p[4] = {0, 1, 2, 3}; int expected_i[3] = {0, 1, 2}; - mu_assert("scalar mult log: x values fail", - cmp_double_array(y->jacobian->x, expected_x, 3)); - mu_assert("scalar mult log: row pointers fail", - cmp_int_array(y->jacobian->p, expected_p, 4)); - mu_assert("scalar mult log: column indices fail", - cmp_int_array(y->jacobian->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(y->jacobian, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(y->jacobian, expected_p, expected_i, 3, 3)); free_expr(y); return 0; @@ -73,12 +70,9 @@ const char *test_jacobian_scalar_mult_log_matrix(void) int expected_p[5] = {0, 1, 2, 3, 4}; int expected_i[4] = {0, 1, 2, 3}; - mu_assert("scalar mult log matrix: x values fail", - cmp_double_array(y->jacobian->x, expected_x, 4)); - mu_assert("scalar mult log matrix: row pointers fail", - cmp_int_array(y->jacobian->p, expected_p, 5)); - mu_assert("scalar mult log matrix: column indices fail", - cmp_int_array(y->jacobian->i, expected_i, 4)); + mu_assert("vals fail", cmp_values(y->jacobian, expected_x, 4)); + mu_assert("sparsity fail", + cmp_sparsity(y->jacobian, expected_p, expected_i, 4, 4)); free_expr(y); return 0; diff --git a/tests/jacobian_tests/affine/test_sum.h b/tests/jacobian_tests/affine/test_sum.h index f34ee91..150fe53 100644 --- a/tests/jacobian_tests/affine/test_sum.h +++ b/tests/jacobian_tests/affine/test_sum.h @@ -25,9 +25,9 @@ const char *test_jacobian_sum_log(void) int expected_Ap[2] = {0, 3}; int expected_Ai[3] = {2, 3, 4}; - mu_assert("vals fail", cmp_double_array(sum_node->jacobian->x, expected_Ax, 3)); - mu_assert("rows fail", cmp_int_array(sum_node->jacobian->p, expected_Ap, 2)); - mu_assert("cols fail", cmp_int_array(sum_node->jacobian->i, expected_Ai, 3)); + mu_assert("vals fail", cmp_values(sum_node->jacobian, expected_Ax, 3)); + mu_assert("sparsity fail", + cmp_sparsity(sum_node->jacobian, expected_Ap, expected_Ai, 1, 3)); free_expr(sum_node); return 0; @@ -59,9 +59,9 @@ const char *test_jacobian_sum_mult(void) int expected_Ap[2] = {0, 6}; /* 1x10 matrix: row 0 spans all 6 nonzeros */ int expected_Ai[6] = {2, 3, 4, 6, 7, 8}; /* column indices */ - mu_assert("vals fail", cmp_double_array(sum_node->jacobian->x, expected_Ax, 6)); - mu_assert("rows fail", cmp_int_array(sum_node->jacobian->p, expected_Ap, 2)); - mu_assert("cols fail", cmp_int_array(sum_node->jacobian->i, expected_Ai, 6)); + mu_assert("vals fail", cmp_values(sum_node->jacobian, expected_Ax, 6)); + mu_assert("sparsity fail", + cmp_sparsity(sum_node->jacobian, expected_Ap, expected_Ai, 1, 6)); free_expr(sum_node); return 0; @@ -99,9 +99,9 @@ const char *test_jacobian_sum_log_axis_0(void) int expected_Ap[3] = {0, 3, 6}; int expected_Ai[6] = {2, 3, 4, 5, 6, 7}; /* column indices */ - mu_assert("vals fail", cmp_double_array(sum_node->jacobian->x, expected_Ax, 6)); - mu_assert("rows fail", cmp_int_array(sum_node->jacobian->p, expected_Ap, 3)); - mu_assert("cols fail", cmp_int_array(sum_node->jacobian->i, expected_Ai, 6)); + mu_assert("vals fail", cmp_values(sum_node->jacobian, expected_Ax, 6)); + mu_assert("sparsity fail", + cmp_sparsity(sum_node->jacobian, expected_Ap, expected_Ai, 2, 6)); free_expr(sum_node); return 0; @@ -146,9 +146,9 @@ const char *test_jacobian_sum_add_log_axis_0(void) int expected_Ai[12] = {2, 3, 4, 8, 9, 10, /* row 0 columns */ 5, 6, 7, 11, 12, 13}; /* row 1 columns */ - mu_assert("vals fail", cmp_double_array(sum_node->jacobian->x, expected_Ax, 12)); - mu_assert("rows fail", cmp_int_array(sum_node->jacobian->p, expected_Ap, 3)); - mu_assert("cols fail", cmp_int_array(sum_node->jacobian->i, expected_Ai, 12)); + mu_assert("vals fail", cmp_values(sum_node->jacobian, expected_Ax, 12)); + mu_assert("sparsity fail", + cmp_sparsity(sum_node->jacobian, expected_Ap, expected_Ai, 2, 12)); free_expr(sum_node); return 0; @@ -188,9 +188,9 @@ const char *test_jacobian_sum_log_axis_1(void) int expected_Ap[4] = {0, 2, 4, 6}; int expected_Ai[6] = {2, 5, 3, 6, 4, 7}; /* column indices */ - mu_assert("vals fail", cmp_double_array(sum_node->jacobian->x, expected_Ax, 6)); - mu_assert("rows fail", cmp_int_array(sum_node->jacobian->p, expected_Ap, 4)); - mu_assert("cols fail", cmp_int_array(sum_node->jacobian->i, expected_Ai, 6)); + mu_assert("vals fail", cmp_values(sum_node->jacobian, expected_Ax, 6)); + mu_assert("sparsity fail", + cmp_sparsity(sum_node->jacobian, expected_Ap, expected_Ai, 3, 6)); free_expr(sum_node); return 0; diff --git a/tests/jacobian_tests/affine/test_trace.h b/tests/jacobian_tests/affine/test_trace.h index 61701bd..808d634 100644 --- a/tests/jacobian_tests/affine/test_trace.h +++ b/tests/jacobian_tests/affine/test_trace.h @@ -39,10 +39,9 @@ const char *test_jacobian_trace_variable(void) int expected_Ap[2] = {0, 3}; int expected_Ai[3] = {1, 5, 9}; /* column indices (global variable indices) */ - mu_assert("vals fail", - cmp_double_array(trace_node->jacobian->x, expected_Ax, 3)); - mu_assert("rows fail", cmp_int_array(trace_node->jacobian->p, expected_Ap, 2)); - mu_assert("cols fail", cmp_int_array(trace_node->jacobian->i, expected_Ai, 3)); + mu_assert("vals fail", cmp_values(trace_node->jacobian, expected_Ax, 3)); + mu_assert("sparsity fail", + cmp_sparsity(trace_node->jacobian, expected_Ap, expected_Ai, 1, 3)); free_expr(trace_node); return 0; @@ -94,10 +93,9 @@ const char *test_jacobian_trace_composite(void) int expected_Ai[3] = {1, 5, 9}; /* column indices (global variable indices) */ mu_assert("vals match count", trace_node->jacobian->nnz == 3); - mu_assert("rows fail", cmp_int_array(trace_node->jacobian->p, expected_Ap, 2)); - mu_assert("cols fail", cmp_int_array(trace_node->jacobian->i, expected_Ai, 3)); - mu_assert("vals fail", - cmp_double_array(trace_node->jacobian->x, expected_Ax, 3)); + mu_assert("sparsity fail", + cmp_sparsity(trace_node->jacobian, expected_Ap, expected_Ai, 1, 3)); + mu_assert("vals fail", cmp_values(trace_node->jacobian, expected_Ax, 3)); free_expr(trace_node); return 0; diff --git a/tests/jacobian_tests/affine/test_transpose.h b/tests/jacobian_tests/affine/test_transpose.h index a02e22d..3302545 100644 --- a/tests/jacobian_tests/affine/test_transpose.h +++ b/tests/jacobian_tests/affine/test_transpose.h @@ -5,13 +5,14 @@ #include "atoms/affine.h" #include "minunit.h" #include "test_helpers.h" +#include "utils/permuted_dense.h" #include #include const char *test_jacobian_transpose(void) { // A = [1 2; 3 4] - CSR_Matrix *A = new_csr_matrix(2, 2, 4); + CSR_matrix *A = new_CSR_matrix(2, 2, 4); int A_p[3] = {0, 2, 4}; int A_i[4] = {0, 1, 0, 1}; double A_x[4] = {1, 2, 3, 4}; @@ -33,14 +34,59 @@ const char *test_jacobian_transpose(void) int expected_p[5] = {0, 2, 4, 6, 8}; int expected_i[8] = {0, 1, 2, 3, 0, 1, 2, 3}; - mu_assert("jacobian values fail", - cmp_double_array(transpose_AX->jacobian->x, expected_x, 8)); - mu_assert("jacobian row ptr fail", - cmp_int_array(transpose_AX->jacobian->p, expected_p, 5)); - mu_assert("jacobian col idx fail", - cmp_int_array(transpose_AX->jacobian->i, expected_i, 8)); + mu_assert("vals fail", cmp_values(transpose_AX->jacobian, expected_x, 8)); + mu_assert("sparsity fail", + cmp_sparsity(transpose_AX->jacobian, expected_p, expected_i, 4, 8)); free_expr(transpose_AX); - free_csr_matrix(A); + free_CSR_matrix(A); + return 0; +} + +/* When the child of transpose has a PD Jacobian, the output should also be PD + with the same col_perm and a permuted row_perm. Setup: + u : 2x1 column variable, n_vars = 2. + AU = left_matmul_dense(A, u) with A a 6x2 dense matrix => AU is 6x1. + PD Jacobian: global (6, 2), m0=6, n0=2, row_perm=[0..5], col_perm=[0,1]. + R = reshape(AU, 3, 2). copy_sparsity preserves PD. + T = transpose(R) with d1=2, d2=3. k(r) = (r/2) + (r%2)*3 = [0,3,1,4,2,5]. + All r are active, so output row_perm stays [0..5] and the dense + block X is row-permuted: X_out[i, :] = X_c[k(i), :]. */ +const char *test_jacobian_transpose_pd_preserved(void) +{ + double A_data[12] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, + 7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; + expr *u = new_variable(2, 1, 0, 2); + expr *AU = new_left_matmul_dense(NULL, u, 6, 2, A_data); + expr *R = new_reshape(AU, 3, 2); + expr *T = new_transpose(R); + + double u_vals[2] = {0.5, -1.5}; + T->forward(T, u_vals); + jacobian_init(T); + T->eval_jacobian(T); + + /* Structural: output Jacobian must be a PD. */ + mu_assert("transpose Jacobian should be PD", T->jacobian->is_permuted_dense); + permuted_dense *pd_T = (permuted_dense *) T->jacobian; + mu_assert("global m", T->jacobian->m == 6); + mu_assert("global n", T->jacobian->n == 2); + mu_assert("m0", pd_T->m0 == 6); + mu_assert("n0", pd_T->n0 == 2); + int expected_row_perm[6] = {0, 1, 2, 3, 4, 5}; + int expected_col_perm[2] = {0, 1}; + mu_assert("row_perm", cmp_int_array(pd_T->row_perm, expected_row_perm, 6)); + mu_assert("col_perm", cmp_int_array(pd_T->col_perm, expected_col_perm, 2)); + + /* Numerical: X_out rows = A rows permuted by k(r) = [0,3,1,4,2,5]. */ + double expected_X[12] = {1.0, 2.0, /* row 0 from A row 0 */ + 7.0, 8.0, /* row 1 from A row 3 */ + 3.0, 4.0, /* row 2 from A row 1 */ + 9.0, 10.0, /* row 3 from A row 4 */ + 5.0, 6.0, /* row 4 from A row 2 */ + 11.0, 12.0}; /* row 5 from A row 5 */ + mu_assert("X values", cmp_double_array(pd_T->X, expected_X, 12)); + + free_expr(T); return 0; } diff --git a/tests/jacobian_tests/affine/test_upper_tri.h b/tests/jacobian_tests/affine/test_upper_tri.h index f0bffa1..82d7419 100644 --- a/tests/jacobian_tests/affine/test_upper_tri.h +++ b/tests/jacobian_tests/affine/test_upper_tri.h @@ -10,7 +10,7 @@ const char *test_upper_tri_jacobian_variable(void) { /* upper_tri of a 4x4 variable (16 vars total) * Row-major upper tri indices: [4, 8, 12, 9, 13, 14] - * Jacobian is 6x16 CSR: row k has a single 1.0 at col indices[k] */ + * Jacobian is 6x16 CSR_matrix: row k has a single 1.0 at col indices[k] */ double u[16]; for (int k = 0; k < 16; k++) { @@ -27,10 +27,9 @@ const char *test_upper_tri_jacobian_variable(void) int expected_p[7] = {0, 1, 2, 3, 4, 5, 6}; int expected_i[6] = {4, 8, 12, 9, 13, 14}; - mu_assert("upper_tri jac vals", - cmp_double_array(ut->jacobian->x, expected_x, 6)); - mu_assert("upper_tri jac p", cmp_int_array(ut->jacobian->p, expected_p, 7)); - mu_assert("upper_tri jac i", cmp_int_array(ut->jacobian->i, expected_i, 6)); + mu_assert("vals fail", cmp_values(ut->jacobian, expected_x, 6)); + mu_assert("sparsity fail", + cmp_sparsity(ut->jacobian, expected_p, expected_i, 6, 6)); free_expr(ut); return 0; @@ -57,12 +56,12 @@ const char *test_upper_tri_jacobian_of_log(void) ut->eval_jacobian(ut); double expected_x[6] = {0.2, 1.0 / 9.0, 1.0 / 13.0, 0.1, 1.0 / 14.0, 1.0 / 15.0}; + int expected_p[7] = {0, 1, 2, 3, 4, 5, 6}; int expected_i[6] = {4, 8, 12, 9, 13, 14}; - mu_assert("upper_tri log jac vals", - cmp_double_array(ut->jacobian->x, expected_x, 6)); - mu_assert("upper_tri log jac cols", - cmp_int_array(ut->jacobian->i, expected_i, 6)); + mu_assert("vals fail", cmp_values(ut->jacobian, expected_x, 6)); + mu_assert("sparsity fail", + cmp_sparsity(ut->jacobian, expected_p, expected_i, 6, 6)); free_expr(ut); return 0; diff --git a/tests/jacobian_tests/affine/test_vector_mult.h b/tests/jacobian_tests/affine/test_vector_mult.h index cd6fa1a..8196f9e 100644 --- a/tests/jacobian_tests/affine/test_vector_mult.h +++ b/tests/jacobian_tests/affine/test_vector_mult.h @@ -35,12 +35,9 @@ const char *test_jacobian_vector_mult_log_vector(void) int expected_p[4] = {0, 1, 2, 3}; int expected_i[3] = {0, 1, 2}; - mu_assert("vector mult log: x values fail", - cmp_double_array(y->jacobian->x, expected_x, 3)); - mu_assert("vector mult log: row pointers fail", - cmp_int_array(y->jacobian->p, expected_p, 4)); - mu_assert("vector mult log: column indices fail", - cmp_int_array(y->jacobian->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(y->jacobian, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(y->jacobian, expected_p, expected_i, 3, 3)); free_expr(y); return 0; @@ -71,12 +68,9 @@ const char *test_jacobian_vector_mult_log_matrix(void) int expected_p[5] = {0, 1, 2, 3, 4}; int expected_i[4] = {0, 1, 2, 3}; - mu_assert("vector mult log matrix: x values fail", - cmp_double_array(y->jacobian->x, expected_x, 4)); - mu_assert("vector mult log matrix: row pointers fail", - cmp_int_array(y->jacobian->p, expected_p, 5)); - mu_assert("vector mult log matrix: column indices fail", - cmp_int_array(y->jacobian->i, expected_i, 4)); + mu_assert("vals fail", cmp_values(y->jacobian, expected_x, 4)); + mu_assert("sparsity fail", + cmp_sparsity(y->jacobian, expected_p, expected_i, 4, 4)); free_expr(y); return 0; diff --git a/tests/jacobian_tests/affine/test_vstack.h b/tests/jacobian_tests/affine/test_vstack.h index fc4229f..86f4ac8 100644 --- a/tests/jacobian_tests/affine/test_vstack.h +++ b/tests/jacobian_tests/affine/test_vstack.h @@ -38,12 +38,9 @@ const char *test_jacobian_vstack_vectors(void) int expected_i[6] = {0, 1, 2, 0, 1, 2}; int expected_p[7] = {0, 1, 2, 3, 4, 5, 6}; - mu_assert("vstack jac vectors: vals", - cmp_double_array(stack->jacobian->x, expected_x, 6)); - mu_assert("vstack jac vectors: cols", - cmp_int_array(stack->jacobian->i, expected_i, 6)); - mu_assert("vstack jac vectors: rows", - cmp_int_array(stack->jacobian->p, expected_p, 7)); + mu_assert("vals fail", cmp_values(stack->jacobian, expected_x, 6)); + mu_assert("sparsity fail", + cmp_sparsity(stack->jacobian, expected_p, expected_i, 6, 6)); free_expr(stack); return 0; @@ -88,12 +85,9 @@ const char *test_jacobian_vstack_matrix(void) int expected_i[9] = {0, 1, 6, 2, 3, 7, 4, 5, 8}; int expected_p[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; - mu_assert("vstack jac matrix: vals", - cmp_double_array(stack->jacobian->x, expected_x, 9)); - mu_assert("vstack jac matrix: cols", - cmp_int_array(stack->jacobian->i, expected_i, 9)); - mu_assert("vstack jac matrix: rows", - cmp_int_array(stack->jacobian->p, expected_p, 10)); + mu_assert("vals fail", cmp_values(stack->jacobian, expected_x, 9)); + mu_assert("sparsity fail", + cmp_sparsity(stack->jacobian, expected_p, expected_i, 9, 9)); free_expr(stack); return 0; diff --git a/tests/jacobian_tests/bivariate_full_dom/test_elementwise_mult.h b/tests/jacobian_tests/bivariate_full_dom/test_elementwise_mult.h index 9e7e6e5..08d50e4 100644 --- a/tests/jacobian_tests/bivariate_full_dom/test_elementwise_mult.h +++ b/tests/jacobian_tests/bivariate_full_dom/test_elementwise_mult.h @@ -24,9 +24,8 @@ const char *test_jacobian_elementwise_mult_1(void) int rows[4] = {0, 2, 4, 6}; int cols[6] = {2, 7, 3, 8, 4, 9}; - mu_assert("vals fail", cmp_double_array(node->jacobian->x, vals, 6)); - mu_assert("rows fail", cmp_int_array(node->jacobian->p, rows, 4)); - mu_assert("cols fail", cmp_int_array(node->jacobian->i, cols, 6)); + mu_assert("vals fail", cmp_values(node->jacobian, vals, 6)); + mu_assert("sparsity fail", cmp_sparsity(node->jacobian, rows, cols, 3, 6)); free_expr(node); return 0; } @@ -49,9 +48,8 @@ const char *test_jacobian_elementwise_mult_2(void) int rows[4] = {0, 2, 4, 6}; int cols[6] = {2, 7, 3, 8, 4, 9}; - mu_assert("vals fail", cmp_double_array(node->jacobian->x, vals, 6)); - mu_assert("rows fail", cmp_int_array(node->jacobian->p, rows, 4)); - mu_assert("cols fail", cmp_int_array(node->jacobian->i, cols, 6)); + mu_assert("vals fail", cmp_values(node->jacobian, vals, 6)); + mu_assert("sparsity fail", cmp_sparsity(node->jacobian, rows, cols, 3, 6)); free_expr(node); return 0; } @@ -66,7 +64,7 @@ const char *test_jacobian_elementwise_mult_3(void) 0 0 1 1 3 0 0 0 0 0 0 0 1 -1 1 0 0 0 0 0] */ - CSR_Matrix *A = new_csr_matrix(3, 10, 9); + CSR_matrix *A = new_CSR_matrix(3, 10, 9); double Ax_vals[9] = {1.0, 2.0, 1.0, 1.0, 3.0, 1.0, -1.0, 1.0}; int Ai[9] = {2, 3, 2, 3, 4, 2, 3, 4}; int Ap[4] = {0, 2, 5, 8}; @@ -80,7 +78,7 @@ const char *test_jacobian_elementwise_mult_3(void) 0 0 0 0 0 0 0 1 -2 1] */ - CSR_Matrix *B = new_csr_matrix(3, 10, 9); + CSR_matrix *B = new_CSR_matrix(3, 10, 9); double Bx_vals[9] = {1.0, 3.0, 1.0, 1.0, 4.0, 1.0, -2.0, 1.0}; int Bi[9] = {7, 8, 7, 8, 9, 7, 8, 9}; int Bp[4] = {0, 2, 5, 8}; @@ -112,12 +110,11 @@ const char *test_jacobian_elementwise_mult_3(void) int rows[4] = {0, 4, 10, 16}; int cols[16] = {2, 3, 7, 8, 2, 3, 4, 7, 8, 9, 2, 3, 4, 7, 8, 9}; - mu_assert("vals fail", cmp_double_array(node->jacobian->x, vals, 16)); - mu_assert("rows fail", cmp_int_array(node->jacobian->p, rows, 4)); - mu_assert("cols fail", cmp_int_array(node->jacobian->i, cols, 16)); + mu_assert("vals fail", cmp_values(node->jacobian, vals, 16)); + mu_assert("sparsity fail", cmp_sparsity(node->jacobian, rows, cols, 3, 16)); free_expr(node); - free_csr_matrix(A); - free_csr_matrix(B); + free_CSR_matrix(A); + free_CSR_matrix(B); return 0; } @@ -131,7 +128,7 @@ const char *test_jacobian_elementwise_mult_4(void) 0 0 1 1 3 0 0 0 0 0 0 0 1 -1 1 0 0 0 0 0] */ - CSR_Matrix *A = new_csr_matrix(3, 10, 9); + CSR_matrix *A = new_CSR_matrix(3, 10, 9); double Ax_vals[9] = {1.0, 2.0, 1.0, 1.0, 3.0, 1.0, -1.0, 1.0}; int Ai[9] = {2, 3, 2, 3, 4, 2, 3, 4}; int Ap[4] = {0, 2, 5, 8}; @@ -157,10 +154,9 @@ const char *test_jacobian_elementwise_mult_4(void) int rows[4] = {0, 2, 5, 8}; int cols[8] = {2, 3, 2, 3, 4, 2, 3, 4}; - mu_assert("vals fail", cmp_double_array(node->jacobian->x, vals, 8)); - mu_assert("rows fail", cmp_int_array(node->jacobian->p, rows, 4)); - mu_assert("cols fail", cmp_int_array(node->jacobian->i, cols, 8)); + mu_assert("vals fail", cmp_values(node->jacobian, vals, 8)); + mu_assert("sparsity fail", cmp_sparsity(node->jacobian, rows, cols, 3, 8)); free_expr(node); - free_csr_matrix(A); + free_CSR_matrix(A); return 0; } diff --git a/tests/jacobian_tests/bivariate_full_dom/test_matmul.h b/tests/jacobian_tests/bivariate_full_dom/test_matmul.h index 386efa1..6155252 100644 --- a/tests/jacobian_tests/bivariate_full_dom/test_matmul.h +++ b/tests/jacobian_tests/bivariate_full_dom/test_matmul.h @@ -49,15 +49,10 @@ const char *test_jacobian_matmul(void) Z->eval_jacobian(Z); /* Verify sparsity pattern */ - mu_assert("Jacobian should have 8 rows", Z->jacobian->m == z_size); mu_assert("Jacobian should have 18 columns", Z->jacobian->n == n_vars); - mu_assert("Jacobian should have 48 nonzeros", Z->jacobian->nnz == 48); - /* Check row pointers: each row should have 6 entries */ + /* Each row should have 6 entries. */ int expected_p[9] = {0, 6, 12, 18, 24, 30, 36, 42, 48}; - mu_assert("Row pointers incorrect", - cmp_int_array(Z->jacobian->p, expected_p, 9)); - int expected_i[48] = {0, 2, 4, 6, 7, 8, /* row 0 */ 1, 3, 5, 6, 7, 8, /* row 1 */ 0, 2, 4, 9, 10, 11, /* row 2 */ @@ -66,8 +61,8 @@ const char *test_jacobian_matmul(void) 1, 3, 5, 12, 13, 14, /* row 5 */ 0, 2, 4, 15, 16, 17, /* row 6 */ 1, 3, 5, 15, 16, 17}; /* row 7 */ - mu_assert("Column indices incorrect", - cmp_int_array(Z->jacobian->i, expected_i, 48)); + mu_assert("sparsity fail", + cmp_sparsity(Z->jacobian, expected_p, expected_i, z_size, 48)); /* Verify Jacobian values row-wise: for each row, values are [Y^T row for the column, X row values] since X has lower var_id */ @@ -81,8 +76,7 @@ const char *test_jacobian_matmul(void) /* row 6 (col 3) */ 16.0, 17.0, 18.0, 1.0, 3.0, 5.0, /* row 7 (col 3) */ 16.0, 17.0, 18.0, 2.0, 4.0, 6.0}; - mu_assert("Jacobian values incorrect", - cmp_double_array(Z->jacobian->x, expected_x, 48)); + mu_assert("vals fail", cmp_values(Z->jacobian, expected_x, 48)); free_expr(Z); return 0; diff --git a/tests/jacobian_tests/bivariate_restricted_dom/test_quad_over_lin.h b/tests/jacobian_tests/bivariate_restricted_dom/test_quad_over_lin.h index e1007f8..1a44469 100644 --- a/tests/jacobian_tests/bivariate_restricted_dom/test_quad_over_lin.h +++ b/tests/jacobian_tests/bivariate_restricted_dom/test_quad_over_lin.h @@ -25,9 +25,9 @@ const char *test_quad_over_lin1(void) int expected_Ap[2] = {0, 4}; int expected_Ai[4] = {2, 3, 4, 7}; - mu_assert("vals fail", cmp_double_array(node->jacobian->x, expected_Ax, 4)); - mu_assert("rows fail", cmp_int_array(node->jacobian->p, expected_Ap, 2)); - mu_assert("cols fail", cmp_int_array(node->jacobian->i, expected_Ai, 4)); + mu_assert("vals fail", cmp_values(node->jacobian, expected_Ax, 4)); + mu_assert("sparsity fail", + cmp_sparsity(node->jacobian, expected_Ap, expected_Ai, 1, 4)); free_expr(node); return 0; } @@ -49,9 +49,9 @@ const char *test_quad_over_lin2(void) int expected_Ap[2] = {0, 4}; int expected_Ai[4] = {2, 5, 6, 7}; - mu_assert("vals fail", cmp_double_array(node->jacobian->x, expected_Ax, 4)); - mu_assert("rows fail", cmp_int_array(node->jacobian->p, expected_Ap, 2)); - mu_assert("cols fail", cmp_int_array(node->jacobian->i, expected_Ai, 4)); + mu_assert("vals fail", cmp_values(node->jacobian, expected_Ax, 4)); + mu_assert("sparsity fail", + cmp_sparsity(node->jacobian, expected_Ap, expected_Ai, 1, 4)); free_expr(node); return 0; } @@ -63,7 +63,7 @@ const char *test_quad_over_lin3(void) // A = [0 0 1 2 3 0 0 0 // 0 0 4 5 6 0 0] - CSR_Matrix *A = new_csr_matrix(2, 8, 6); + CSR_matrix *A = new_CSR_matrix(2, 8, 6); double Ax[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; int Ai[6] = {2, 3, 4, 2, 3, 4}; int Ap[3] = {0, 3, 6}; @@ -86,11 +86,11 @@ const char *test_quad_over_lin3(void) int expected_Ap[2] = {0, 4}; int expected_Ai[4] = {2, 3, 4, 7}; - mu_assert("vals fail", cmp_double_array(node->jacobian->x, expected_vals, 4)); - mu_assert("rows fail", cmp_int_array(node->jacobian->p, expected_Ap, 2)); - mu_assert("cols fail", cmp_int_array(node->jacobian->i, expected_Ai, 4)); + mu_assert("vals fail", cmp_values(node->jacobian, expected_vals, 4)); + mu_assert("sparsity fail", + cmp_sparsity(node->jacobian, expected_Ap, expected_Ai, 1, 4)); - free_csr_matrix(A); + free_CSR_matrix(A); free_expr(node); return 0; } @@ -103,7 +103,7 @@ const char *test_quad_over_lin4(void) // 0 0 0 0 0 4 5 6 // - CSR_Matrix *A = new_csr_matrix(2, 8, 6); + CSR_matrix *A = new_CSR_matrix(2, 8, 6); double Ax[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; int Ai[6] = {5, 6, 7, 5, 6, 7}; int Ap[3] = {0, 3, 6}; @@ -126,11 +126,11 @@ const char *test_quad_over_lin4(void) int expected_Ap[2] = {0, 4}; int expected_Ai[4] = {2, 5, 6, 7}; - mu_assert("vals fail", cmp_double_array(node->jacobian->x, expected_vals, 4)); - mu_assert("rows fail", cmp_int_array(node->jacobian->p, expected_Ap, 2)); - mu_assert("cols fail", cmp_int_array(node->jacobian->i, expected_Ai, 4)); + mu_assert("vals fail", cmp_values(node->jacobian, expected_vals, 4)); + mu_assert("sparsity fail", + cmp_sparsity(node->jacobian, expected_Ap, expected_Ai, 1, 4)); - free_csr_matrix(A); + free_CSR_matrix(A); free_expr(node); return 0; } @@ -143,7 +143,7 @@ const char *test_quad_over_lin5(void) // 0 2 0 0 0 4 5 6 // - CSR_Matrix *A = new_csr_matrix(2, 8, 9); + CSR_matrix *A = new_CSR_matrix(2, 8, 9); double Ax[9] = {1, 3, 1.0, 2.0, 3.0, 2, 4.0, 5.0, 6.0}; int Ai[9] = {0, 3, 5, 6, 7, 1, 5, 6, 7}; int Ap[3] = {0, 5, 9}; @@ -166,11 +166,11 @@ const char *test_quad_over_lin5(void) int expected_Ap[2] = {0, 7}; int expected_Ai[7] = {0, 1, 2, 3, 5, 6, 7}; - mu_assert("vals fail", cmp_double_array(node->jacobian->x, expected_vals, 7)); - mu_assert("rows fail", cmp_int_array(node->jacobian->p, expected_Ap, 2)); - mu_assert("cols fail", cmp_int_array(node->jacobian->i, expected_Ai, 7)); + mu_assert("vals fail", cmp_values(node->jacobian, expected_vals, 7)); + mu_assert("sparsity fail", + cmp_sparsity(node->jacobian, expected_Ap, expected_Ai, 1, 7)); - free_csr_matrix(A); + free_CSR_matrix(A); free_expr(node); return 0; } diff --git a/tests/jacobian_tests/bivariate_restricted_dom/test_rel_entr.h b/tests/jacobian_tests/bivariate_restricted_dom/test_rel_entr.h index 883e233..e9998fa 100644 --- a/tests/jacobian_tests/bivariate_restricted_dom/test_rel_entr.h +++ b/tests/jacobian_tests/bivariate_restricted_dom/test_rel_entr.h @@ -30,9 +30,9 @@ const char *test_jacobian_rel_entr_vector_args_1(void) int expected_Ap[4] = {0, 2, 4, 6}; int expected_Ai[6] = {2, 7, 3, 8, 4, 9}; - mu_assert("vals fail", cmp_double_array(node->jacobian->x, expected_Ax, 6)); - mu_assert("rows fail", cmp_int_array(node->jacobian->p, expected_Ap, 4)); - mu_assert("cols fail", cmp_int_array(node->jacobian->i, expected_Ai, 6)); + mu_assert("sparsity fail", + cmp_sparsity(node->jacobian, expected_Ap, expected_Ai, 3, 6)); + mu_assert("vals fail", cmp_values(node->jacobian, expected_Ax, 6)); free_expr(node); return 0; } @@ -61,9 +61,9 @@ const char *test_jacobian_rel_entr_vector_args_2(void) int expected_Ap[4] = {0, 2, 4, 6}; int expected_Ai[6] = {2, 7, 3, 8, 4, 9}; - mu_assert("vals fail", cmp_double_array(node->jacobian->x, expected_Ax, 6)); - mu_assert("rows fail", cmp_int_array(node->jacobian->p, expected_Ap, 4)); - mu_assert("cols fail", cmp_int_array(node->jacobian->i, expected_Ai, 6)); + mu_assert("sparsity fail", + cmp_sparsity(node->jacobian, expected_Ap, expected_Ai, 3, 6)); + mu_assert("vals fail", cmp_values(node->jacobian, expected_Ax, 6)); free_expr(node); return 0; } @@ -103,9 +103,9 @@ const char *test_jacobian_rel_entr_matrix_args(void) int expected_Ap[7] = {0, 2, 4, 6, 8, 10, 12}; int expected_Ai[12] = {0, 6, 1, 7, 2, 8, 3, 9, 4, 10, 5, 11}; - mu_assert("vals fail", cmp_double_array(node->jacobian->x, expected_Ax, 12)); - mu_assert("rows fail", cmp_int_array(node->jacobian->p, expected_Ap, 7)); - mu_assert("cols fail", cmp_int_array(node->jacobian->i, expected_Ai, 12)); + mu_assert("sparsity fail", + cmp_sparsity(node->jacobian, expected_Ap, expected_Ai, 6, 12)); + mu_assert("vals fail", cmp_values(node->jacobian, expected_Ax, 12)); free_expr(node); return 0; diff --git a/tests/jacobian_tests/bivariate_restricted_dom/test_rel_entr_scalar_vector.h b/tests/jacobian_tests/bivariate_restricted_dom/test_rel_entr_scalar_vector.h index 20cd6fa..5236d3e 100644 --- a/tests/jacobian_tests/bivariate_restricted_dom/test_rel_entr_scalar_vector.h +++ b/tests/jacobian_tests/bivariate_restricted_dom/test_rel_entr_scalar_vector.h @@ -28,9 +28,9 @@ const char *test_jacobian_rel_entr_scalar_vector(void) int expected_Ap[4] = {0, 2, 4, 6}; int expected_Ai[6] = {0, 1, 0, 2, 0, 3}; - mu_assert("vals fail", cmp_double_array(node->jacobian->x, expected_Ax, 6)); - mu_assert("rows fail", cmp_int_array(node->jacobian->p, expected_Ap, 4)); - mu_assert("cols fail", cmp_int_array(node->jacobian->i, expected_Ai, 6)); + mu_assert("vals fail", cmp_values(node->jacobian, expected_Ax, 6)); + mu_assert("sparsity fail", + cmp_sparsity(node->jacobian, expected_Ap, expected_Ai, 3, 6)); free_expr(node); return 0; } diff --git a/tests/jacobian_tests/bivariate_restricted_dom/test_rel_entr_vector_scalar.h b/tests/jacobian_tests/bivariate_restricted_dom/test_rel_entr_vector_scalar.h index 9a454dd..d2f594f 100644 --- a/tests/jacobian_tests/bivariate_restricted_dom/test_rel_entr_vector_scalar.h +++ b/tests/jacobian_tests/bivariate_restricted_dom/test_rel_entr_vector_scalar.h @@ -28,9 +28,9 @@ const char *test_jacobian_rel_entr_vector_scalar(void) int expected_Ap[4] = {0, 2, 4, 6}; int expected_Ai[6] = {0, 3, 1, 3, 2, 3}; - mu_assert("vals fail", cmp_double_array(node->jacobian->x, expected_Ax, 6)); - mu_assert("rows fail", cmp_int_array(node->jacobian->p, expected_Ap, 4)); - mu_assert("cols fail", cmp_int_array(node->jacobian->i, expected_Ai, 6)); + mu_assert("vals fail", cmp_values(node->jacobian, expected_Ax, 6)); + mu_assert("sparsity fail", + cmp_sparsity(node->jacobian, expected_Ap, expected_Ai, 3, 6)); free_expr(node); return 0; } diff --git a/tests/jacobian_tests/composite/test_chain_rule_jacobian.h b/tests/jacobian_tests/composite/test_chain_rule_jacobian.h index bb7a23c..1fd6a65 100644 --- a/tests/jacobian_tests/composite/test_chain_rule_jacobian.h +++ b/tests/jacobian_tests/composite/test_chain_rule_jacobian.h @@ -6,7 +6,7 @@ #include "minunit.h" #include "numerical_diff.h" #include "test_helpers.h" -#include "utils/CSR_Matrix.h" +#include "utils/CSR_matrix.h" const char *test_jacobian_exp_sum(void) { @@ -79,8 +79,8 @@ const char *test_jacobian_Ax_Bx_multiply(void) /* the first and last values are not used, but good to include them in test */ double u_vals[4] = {1.0, 2.0, 3.0, 4.0}; - CSR_Matrix *A = new_csr_random(2, 2, 1.0); - CSR_Matrix *B = new_csr_random(2, 2, 1.0); + CSR_matrix *A = new_csr_random(2, 2, 1.0); + CSR_matrix *B = new_csr_random(2, 2, 1.0); expr *x = new_variable(2, 1, 1, 4); expr *Ax = new_left_matmul(NULL, x, A); expr *Bx = new_left_matmul(NULL, x, B); @@ -90,8 +90,8 @@ const char *test_jacobian_Ax_Bx_multiply(void) check_jacobian_num(multiply, u_vals, NUMERICAL_DIFF_DEFAULT_H)); free_expr(multiply); - free_csr_matrix(A); - free_csr_matrix(B); + free_CSR_matrix(A); + free_CSR_matrix(B); return 0; } @@ -99,8 +99,8 @@ const char *test_jacobian_AX_BX_multiply(void) { double u_vals[4] = {1.0, 2.0, 3.0, 4.0}; - CSR_Matrix *A = new_csr_random(2, 2, 1.0); - CSR_Matrix *B = new_csr_random(2, 2, 1.0); + CSR_matrix *A = new_csr_random(2, 2, 1.0); + CSR_matrix *B = new_csr_random(2, 2, 1.0); expr *X = new_variable(2, 2, 0, 4); expr *AX = new_left_matmul(NULL, X, A); expr *BX = new_left_matmul(NULL, X, B); @@ -110,8 +110,8 @@ const char *test_jacobian_AX_BX_multiply(void) check_jacobian_num(multiply, u_vals, NUMERICAL_DIFF_DEFAULT_H)); free_expr(multiply); - free_csr_matrix(A); - free_csr_matrix(B); + free_CSR_matrix(A); + free_CSR_matrix(B); return 0; } @@ -120,10 +120,10 @@ const char *test_jacobian_quad_form_Ax(void) /* (Ax)^T Q (Ax) where Q is symmetric */ double u_vals[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; - CSR_Matrix *A = new_csr_random(3, 4, 1.0); + CSR_matrix *A = new_csr_random(3, 4, 1.0); /* Q = [1 2 0; 2 3 0; 0 0 4] */ - CSR_Matrix *Q = new_csr_matrix(3, 3, 5); + CSR_matrix *Q = new_CSR_matrix(3, 3, 5); double Qx[5] = {1.0, 2.0, 2.0, 3.0, 4.0}; int Qi[5] = {0, 1, 0, 1, 2}; int Qp[4] = {0, 2, 4, 5}; @@ -140,8 +140,8 @@ const char *test_jacobian_quad_form_Ax(void) check_jacobian_num(node, u_vals, NUMERICAL_DIFF_DEFAULT_H)); free_expr(node); - free_csr_matrix(A); - free_csr_matrix(Q); + free_CSR_matrix(A); + free_CSR_matrix(Q); return 0; } @@ -151,7 +151,7 @@ const char *test_jacobian_quad_form_exp(void) double u_vals[3] = {0.5, 1.0, 1.5}; /* Q = [1 2 0; 2 3 0; 0 0 4] */ - CSR_Matrix *Q = new_csr_matrix(3, 3, 5); + CSR_matrix *Q = new_CSR_matrix(3, 3, 5); double Qx[5] = {1.0, 2.0, 2.0, 3.0, 4.0}; int Qi[5] = {0, 1, 0, 1, 2}; int Qp[4] = {0, 2, 4, 5}; @@ -167,7 +167,7 @@ const char *test_jacobian_quad_form_exp(void) check_jacobian_num(node, u_vals, NUMERICAL_DIFF_DEFAULT_H)); free_expr(node); - free_csr_matrix(Q); + free_CSR_matrix(Q); return 0; } @@ -212,8 +212,8 @@ const char *test_jacobian_matmul_Ax_By(void) /* Z = (A @ X) @ (B @ Y) with constant matrices A, B */ double u_vals[10] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}; - CSR_Matrix *A = new_csr_random(3, 2, 1.0); - CSR_Matrix *B = new_csr_random(2, 3, 1.0); + CSR_matrix *A = new_csr_random(3, 2, 1.0); + CSR_matrix *B = new_csr_random(2, 3, 1.0); expr *X = new_variable(2, 2, 0, 10); /* 2x2, vars 0-3 */ expr *Y = new_variable(3, 2, 4, 10); /* 3x2, vars 4-9 */ @@ -225,8 +225,8 @@ const char *test_jacobian_matmul_Ax_By(void) check_jacobian_num(Z, u_vals, NUMERICAL_DIFF_DEFAULT_H)); free_expr(Z); - free_csr_matrix(A); - free_csr_matrix(B); + free_CSR_matrix(A); + free_CSR_matrix(B); return 0; } @@ -235,8 +235,8 @@ const char *test_jacobian_matmul_sin_Ax_cos_Bx(void) /* Z = sin(A @ X) @ cos(B @ X), shared variable X */ double u_vals[6] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0}; - CSR_Matrix *A = new_csr_random(2, 3, 1.0); - CSR_Matrix *B = new_csr_random(2, 3, 1.0); + CSR_matrix *A = new_csr_random(2, 3, 1.0); + CSR_matrix *B = new_csr_random(2, 3, 1.0); expr *X = new_variable(3, 2, 0, 6); /* 3x2, vars 0-5 */ expr *AX = new_left_matmul(NULL, X, A); /* 2x2 */ @@ -249,8 +249,8 @@ const char *test_jacobian_matmul_sin_Ax_cos_Bx(void) check_jacobian_num(Z, u_vals, NUMERICAL_DIFF_DEFAULT_H)); free_expr(Z); - free_csr_matrix(A); - free_csr_matrix(B); + free_CSR_matrix(A); + free_CSR_matrix(B); return 0; } diff --git a/tests/jacobian_tests/composite/test_composite_exp.h b/tests/jacobian_tests/composite/test_composite_exp.h index 32671d1..73a2e95 100644 --- a/tests/jacobian_tests/composite/test_composite_exp.h +++ b/tests/jacobian_tests/composite/test_composite_exp.h @@ -11,7 +11,7 @@ const char *test_jacobian_composite_exp(void) { double u_vals[6] = {0, 0, 1, 2, 3, 0}; - CSR_Matrix *A = new_csr_matrix(2, 6, 6); + CSR_matrix *A = new_CSR_matrix(2, 6, 6); double Ax[6] = {3, 2, 1, 2, 1, 1}; int Ai[6] = {2, 3, 4, 2, 3, 4}; int Ap[3] = {0, 3, 6}; @@ -33,11 +33,10 @@ const char *test_jacobian_composite_exp(void) double vals[6] = {3 * e10, 2 * e10, 1 * e10, 2 * e7, 1 * e7, 1 * e7}; int rows[3] = {0, 3, 6}; int cols[6] = {2, 3, 4, 2, 3, 4}; - mu_assert("vals fail", cmp_double_array(exp_node->jacobian->x, vals, 6)); - mu_assert("rows fail", cmp_int_array(exp_node->jacobian->p, rows, 3)); - mu_assert("cols fail", cmp_int_array(exp_node->jacobian->i, cols, 6)); + mu_assert("vals fail", cmp_values(exp_node->jacobian, vals, 6)); + mu_assert("sparsity fail", cmp_sparsity(exp_node->jacobian, rows, cols, 2, 6)); free_expr(exp_node); - free_csr_matrix(A); + free_CSR_matrix(A); return 0; } @@ -46,7 +45,7 @@ const char *test_jacobian_composite_exp_add(void) { double u_vals[7] = {0, 0, 1, 1, 1, 2, 2}; - CSR_Matrix *A = new_csr_matrix(3, 7, 9); + CSR_matrix *A = new_CSR_matrix(3, 7, 9); double Ax[9] = {1, 1, 1, 2, 2, 2, 3, 3, 3}; int Ai[9] = {2, 3, 4, 2, 3, 4, 2, 3, 4}; int Ap[4] = {0, 3, 6, 9}; @@ -54,7 +53,7 @@ const char *test_jacobian_composite_exp_add(void) memcpy(A->i, Ai, 9 * sizeof(int)); memcpy(A->p, Ap, 4 * sizeof(int)); - CSR_Matrix *B = new_csr_matrix(3, 7, 6); + CSR_matrix *B = new_CSR_matrix(3, 7, 6); double Bx[6] = {1, 1, 2, 2, 3, 3}; int Bi[6] = {5, 6, 5, 6, 5, 6}; int Bp[4] = {0, 2, 4, 6}; @@ -74,7 +73,7 @@ const char *test_jacobian_composite_exp_add(void) check_jacobian_num(sum, u_vals, NUMERICAL_DIFF_DEFAULT_H)); free_expr(sum); - free_csr_matrix(A); - free_csr_matrix(B); + free_CSR_matrix(A); + free_CSR_matrix(B); return 0; } diff --git a/tests/jacobian_tests/elementwise_restricted_dom/test_log.h b/tests/jacobian_tests/elementwise_restricted_dom/test_log.h index ff99d64..b671910 100644 --- a/tests/jacobian_tests/elementwise_restricted_dom/test_log.h +++ b/tests/jacobian_tests/elementwise_restricted_dom/test_log.h @@ -18,9 +18,9 @@ const char *test_jacobian_log(void) log_node->forward(log_node, u_vals); jacobian_init(log_node); log_node->eval_jacobian(log_node); - mu_assert("vals fail", cmp_double_array(log_node->jacobian->x, expected_Ax, 3)); - mu_assert("rows fail", cmp_int_array(log_node->jacobian->p, expected_Ap, 4)); - mu_assert("cols fail", cmp_int_array(log_node->jacobian->i, expected_Ai, 3)); + mu_assert("vals fail", cmp_values(log_node->jacobian, expected_Ax, 3)); + mu_assert("sparsity fail", + cmp_sparsity(log_node->jacobian, expected_Ap, expected_Ai, 3, 3)); free_expr(log_node); return 0; } @@ -36,9 +36,9 @@ const char *test_jacobian_log_matrix(void) log_node->forward(log_node, u_vals); jacobian_init(log_node); log_node->eval_jacobian(log_node); - mu_assert("vals fail", cmp_double_array(log_node->jacobian->x, expected_Ax, 4)); - mu_assert("rows fail", cmp_int_array(log_node->jacobian->p, expected_Ap, 5)); - mu_assert("cols fail", cmp_int_array(log_node->jacobian->i, expected_Ai, 4)); + mu_assert("vals fail", cmp_values(log_node->jacobian, expected_Ax, 4)); + mu_assert("sparsity fail", + cmp_sparsity(log_node->jacobian, expected_Ap, expected_Ai, 4, 4)); free_expr(log_node); return 0; } diff --git a/tests/jacobian_tests/other/test_prod.h b/tests/jacobian_tests/other/test_prod.h index ef59250..7875437 100644 --- a/tests/jacobian_tests/other/test_prod.h +++ b/tests/jacobian_tests/other/test_prod.h @@ -25,9 +25,9 @@ const char *test_jacobian_prod_no_zero(void) int expected_Ap[2] = {0, 4}; int expected_Ai[4] = {2, 3, 4, 5}; - mu_assert("vals fail", cmp_double_array(p->jacobian->x, expected_Ax, 4)); - mu_assert("rows fail", cmp_int_array(p->jacobian->p, expected_Ap, 2)); - mu_assert("cols fail", cmp_int_array(p->jacobian->i, expected_Ai, 4)); + mu_assert("vals fail", cmp_values(p->jacobian, expected_Ax, 4)); + mu_assert("sparsity fail", + cmp_sparsity(p->jacobian, expected_Ap, expected_Ai, 1, 4)); free_expr(p); return 0; @@ -50,9 +50,9 @@ const char *test_jacobian_prod_one_zero(void) int expected_Ap[2] = {0, 4}; int expected_Ai[4] = {2, 3, 4, 5}; - mu_assert("vals fail", cmp_double_array(p->jacobian->x, expected_Ax, 4)); - mu_assert("rows fail", cmp_int_array(p->jacobian->p, expected_Ap, 2)); - mu_assert("cols fail", cmp_int_array(p->jacobian->i, expected_Ai, 4)); + mu_assert("vals fail", cmp_values(p->jacobian, expected_Ax, 4)); + mu_assert("sparsity fail", + cmp_sparsity(p->jacobian, expected_Ap, expected_Ai, 1, 4)); free_expr(p); return 0; @@ -73,9 +73,9 @@ const char *test_jacobian_prod_two_zeros(void) int expected_Ap[2] = {0, 4}; int expected_Ai[4] = {2, 3, 4, 5}; - mu_assert("vals fail", cmp_double_array(p->jacobian->x, expected_Ax, 4)); - mu_assert("rows fail", cmp_int_array(p->jacobian->p, expected_Ap, 2)); - mu_assert("cols fail", cmp_int_array(p->jacobian->i, expected_Ai, 4)); + mu_assert("vals fail", cmp_values(p->jacobian, expected_Ax, 4)); + mu_assert("sparsity fail", + cmp_sparsity(p->jacobian, expected_Ap, expected_Ai, 1, 4)); free_expr(p); return 0; diff --git a/tests/jacobian_tests/other/test_prod_axis_one.h b/tests/jacobian_tests/other/test_prod_axis_one.h index b4757ee..8310f86 100644 --- a/tests/jacobian_tests/other/test_prod_axis_one.h +++ b/tests/jacobian_tests/other/test_prod_axis_one.h @@ -37,14 +37,14 @@ const char *test_jacobian_prod_axis_one(void) jacobian_init(p); p->eval_jacobian(p); - /* CSR format for 3x10 Jacobian with row-strided structure */ + /* CSR_matrix format for 3x10 Jacobian with row-strided structure */ double expected_Ax[9] = {28.0, 7.0, 4.0, 40.0, 16.0, 10.0, 54.0, 27.0, 18.0}; int expected_Ap[4] = {0, 3, 6, 9}; int expected_Ai[9] = {1, 4, 7, 2, 5, 8, 3, 6, 9}; - mu_assert("vals fail", cmp_double_array(p->jacobian->x, expected_Ax, 9)); - mu_assert("rows fail", cmp_int_array(p->jacobian->p, expected_Ap, 4)); - mu_assert("cols fail", cmp_int_array(p->jacobian->i, expected_Ai, 9)); + mu_assert("vals fail", cmp_values(p->jacobian, expected_Ax, 9)); + mu_assert("sparsity fail", + cmp_sparsity(p->jacobian, expected_Ap, expected_Ai, 3, 9)); free_expr(p); return 0; @@ -80,14 +80,14 @@ const char *test_jacobian_prod_axis_one_one_zero(void) jacobian_init(p); p->eval_jacobian(p); - /* CSR format for 3x10 Jacobian with row-strided structure */ + /* CSR_matrix format for 3x10 Jacobian with row-strided structure */ double expected_Ax[9] = {28.0, 7.0, 4.0, 0.0, 16.0, 0.0, 54.0, 27.0, 18.0}; int expected_Ap[4] = {0, 3, 6, 9}; int expected_Ai[9] = {1, 4, 7, 2, 5, 8, 3, 6, 9}; - mu_assert("vals fail", cmp_double_array(p->jacobian->x, expected_Ax, 9)); - mu_assert("rows fail", cmp_int_array(p->jacobian->p, expected_Ap, 4)); - mu_assert("cols fail", cmp_int_array(p->jacobian->i, expected_Ai, 9)); + mu_assert("vals fail", cmp_values(p->jacobian, expected_Ax, 9)); + mu_assert("sparsity fail", + cmp_sparsity(p->jacobian, expected_Ap, expected_Ai, 3, 9)); free_expr(p); return 0; diff --git a/tests/jacobian_tests/other/test_prod_axis_zero.h b/tests/jacobian_tests/other/test_prod_axis_zero.h index 42b4ad1..519a40c 100644 --- a/tests/jacobian_tests/other/test_prod_axis_zero.h +++ b/tests/jacobian_tests/other/test_prod_axis_zero.h @@ -31,14 +31,14 @@ const char *test_jacobian_prod_axis_zero(void) jacobian_init(p); p->eval_jacobian(p); - /* CSR format for 3x8 Jacobian with block diagonal structure */ + /* CSR_matrix format for 3x8 Jacobian with block diagonal structure */ double expected_Ax[6] = {2.0, 1.0, 4.0, 3.0, 6.0, 5.0}; int expected_Ap[4] = {0, 2, 4, 6}; int expected_Ai[6] = {1, 2, 3, 4, 5, 6}; - mu_assert("vals fail", cmp_double_array(p->jacobian->x, expected_Ax, 6)); - mu_assert("rows fail", cmp_int_array(p->jacobian->p, expected_Ap, 4)); - mu_assert("cols fail", cmp_int_array(p->jacobian->i, expected_Ai, 6)); + mu_assert("vals fail", cmp_values(p->jacobian, expected_Ax, 6)); + mu_assert("sparsity fail", + cmp_sparsity(p->jacobian, expected_Ap, expected_Ai, 3, 6)); free_expr(p); return 0; diff --git a/tests/jacobian_tests/other/test_quad_form.h b/tests/jacobian_tests/other/test_quad_form.h index d832a83..22d33c0 100644 --- a/tests/jacobian_tests/other/test_quad_form.h +++ b/tests/jacobian_tests/other/test_quad_form.h @@ -14,7 +14,7 @@ const char *test_quad_form(void) // Q = [1 2 0; 2 3 0; 0 0 4] double u_vals[5] = {0, 0, 1, 2, 3}; expr *x = new_variable(3, 1, 2, 5); - CSR_Matrix *Q = new_csr_matrix(3, 3, 5); + CSR_matrix *Q = new_CSR_matrix(3, 3, 5); double Qx[5] = {1.0, 2.0, 2.0, 3.0, 4.0}; int Qi[5] = {0, 1, 0, 1, 2}; int Qp[4] = {0, 2, 4, 5}; @@ -31,11 +31,11 @@ const char *test_quad_form(void) int expected_Ap[2] = {0, 3}; int expected_Ai[3] = {2, 3, 4}; - mu_assert("vals fail", cmp_double_array(node->jacobian->x, expected_Ax, 3)); - mu_assert("rows fail", cmp_int_array(node->jacobian->p, expected_Ap, 2)); - mu_assert("cols fail", cmp_int_array(node->jacobian->i, expected_Ai, 3)); + mu_assert("vals fail", cmp_values(node->jacobian, expected_Ax, 3)); + mu_assert("sparsity fail", + cmp_sparsity(node->jacobian, expected_Ap, expected_Ai, 1, 3)); free_expr(node); - free_csr_matrix(Q); + free_CSR_matrix(Q); return 0; } @@ -51,7 +51,7 @@ src/other/quad_form.c. const char *test_quad_form2(void) // 1 0 0 2 0 1] double u_vals[6] = {1, 2, 3, 4, 5, 6}; expr *u = new_variable(6, 1, 0, 6); -CSR_Matrix *Q = new_csr_matrix(3, 3, 5); +CSR_matrix *Q = new_CSR_matrix(3, 3, 5); double Qx[5] = {1.0, 2.0, 2.0, 3.0, 4.0}; int Qi[5] = {0, 1, 0, 1, 2}; int Qp[4] = {0, 2, 4, 5}; @@ -59,7 +59,7 @@ memcpy(Q->x, Qx, 5 * sizeof(double)); memcpy(Q->i, Qi, 5 * sizeof(int)); memcpy(Q->p, Qp, 4 * sizeof(int)); -CSR_Matrix *A = new_csr_matrix(3, 6, 10); +CSR_matrix *A = new_CSR_matrix(3, 6, 10); double Ax[10] = {1.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6, 1.0, 2.0, 1.0}; int Ai[10] = {0, 2, 3, 4, 2, 3, 4, 0, 3, 5}; int Ap[4] = {0, 4, 7, 10}; @@ -77,13 +77,13 @@ double expected_Ax[5] = {422, 2222, 3244, 3786, 120}; int expected_Ap[2] = {0, 5}; int expected_Ai[5] = {0, 2, 3, 4, 5}; -mu_assert("vals fail", cmp_double_array(node->jacobian->x, expected_Ax, 5)); -mu_assert("rows fail", cmp_int_array(node->jacobian->p, expected_Ap, 2)); -mu_assert("cols fail", cmp_int_array(node->jacobian->i, expected_Ai, 5)); +mu_assert("vals fail", cmp_values(node->jacobian, expected_Ax, 5)); +mu_assert("sparsity fail", + cmp_sparsity(node->jacobian, expected_Ap, expected_Ai, 1, 5)); free_expr(node); free_expr(Au); -free_csr_matrix(Q); -free_csr_matrix(A); +free_CSR_matrix(Q); +free_CSR_matrix(A); return 0; } */ diff --git a/tests/numerical_diff.c b/tests/numerical_diff.c index 9c9812a..b79547d 100644 --- a/tests/numerical_diff.c +++ b/tests/numerical_diff.c @@ -13,7 +13,7 @@ static int is_close(double a, double b) return fabs(a - b) <= fmax(ABS_TOL, REL_TOL * fmax(fabs(a), fabs(b))); } -static void csr_to_dense(const CSR_Matrix *A, double *dense) +static void csr_to_dense(const CSR_matrix *A, double *dense) { for (int row = 0; row < A->m; row++) { @@ -76,7 +76,7 @@ int check_jacobian_num(expr *node, const double *u, double h) node->forward(node, u); double *J_analytical = calloc((size_t) m * n, sizeof(double)); - csr_to_dense(node->jacobian, J_analytical); + csr_to_dense(node->jacobian->to_csr(node->jacobian), J_analytical); int result = 1; for (int i = 0; i < m * n; i++) @@ -98,9 +98,9 @@ int check_jacobian_num(expr *node, const double *u, double h) return result; } -/* Compute g = J^T w where J is CSR (m x n) and w has m entries. +/* Compute g = J^T w where J is CSR_matrix (m x n) and w has m entries. * Result written into g (size n), which must be zero-initialized. */ -static void csr_transpose_mult_vec(const CSR_Matrix *J, const double *w, double *g) +static void csr_transpose_mult_vec(const CSR_matrix *J, const double *w, double *g) { for (int row = 0; row < J->m; row++) { @@ -127,6 +127,12 @@ double *numerical_wsum_hess(expr *node, const double *u, const double *w, double memcpy(u_work, u, n * sizeof(double)); + /* Hoist the CSR_matrix view once. For sparse_matrix (the only type used by tests + that reach here), csr->x aliases node->jacobian->x, so eval_jacobian + writes inside the loop update jac->x in place. A PD-backed Jacobian + would need a per-iteration to_csr refresh; not exercised today. */ + CSR_matrix *jac = node->jacobian->to_csr(node->jacobian); + for (int j = 0; j < n; j++) { /* g(u + h*e_j) */ @@ -134,14 +140,14 @@ double *numerical_wsum_hess(expr *node, const double *u, const double *w, double node->forward(node, u_work); node->eval_jacobian(node); memset(g_plus, 0, n * sizeof(double)); - csr_transpose_mult_vec(node->jacobian, w, g_plus); + csr_transpose_mult_vec(jac, w, g_plus); /* g(u - h*e_j) */ u_work[j] = u[j] - h; node->forward(node, u_work); node->eval_jacobian(node); memset(g_minus, 0, n * sizeof(double)); - csr_transpose_mult_vec(node->jacobian, w, g_minus); + csr_transpose_mult_vec(jac, w, g_minus); u_work[j] = u[j]; @@ -171,7 +177,7 @@ int check_wsum_hess(expr *node, const double *u, const double *w, double h) node->eval_wsum_hess(node, w); double *H_ana = calloc((size_t) n * n, sizeof(double)); - csr_to_dense(node->wsum_hess, H_ana); + csr_to_dense(node->wsum_hess->to_csr(node->wsum_hess), H_ana); int result = 1; for (int i = 0; i < n * n; i++) diff --git a/tests/numerical_diff/test_numerical_diff.h b/tests/numerical_diff/test_numerical_diff.h index 8af633d..a43b718 100644 --- a/tests/numerical_diff/test_numerical_diff.h +++ b/tests/numerical_diff/test_numerical_diff.h @@ -9,7 +9,7 @@ const char *test_check_jacobian_composite_exp(void) { double u_vals[6] = {0, 0, 1, 2, 3, 0}; - CSR_Matrix *A = new_csr_matrix(2, 6, 6); + CSR_matrix *A = new_CSR_matrix(2, 6, 6); double Ax[6] = {3, 2, 1, 2, 1, 1}; int Ai[6] = {2, 3, 4, 2, 3, 4}; int Ap[3] = {0, 3, 6}; @@ -25,7 +25,7 @@ const char *test_check_jacobian_composite_exp(void) check_jacobian_num(exp_node, u_vals, NUMERICAL_DIFF_DEFAULT_H)); free_expr(exp_node); - free_csr_matrix(A); + free_CSR_matrix(A); return 0; } @@ -36,7 +36,7 @@ const char *test_check_wsum_hess_exp_composite(void) double Ax[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; int Ai[] = {0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4}; int Ap[] = {0, 5, 10, 15}; - CSR_Matrix *A_csr = new_csr_matrix(3, 5, 15); + CSR_matrix *A_csr = new_CSR_matrix(3, 5, 15); memcpy(A_csr->x, Ax, 15 * sizeof(double)); memcpy(A_csr->i, Ai, 15 * sizeof(int)); memcpy(A_csr->p, Ap, 4 * sizeof(int)); @@ -49,6 +49,6 @@ const char *test_check_wsum_hess_exp_composite(void) check_wsum_hess(exp_node, u_vals, w, NUMERICAL_DIFF_DEFAULT_H)); free_expr(exp_node); - free_csr_matrix(A_csr); + free_CSR_matrix(A_csr); return 0; } diff --git a/tests/old-code/test_old_permuted_dense.h b/tests/old-code/test_old_permuted_dense.h new file mode 100644 index 0000000..335e5ff --- /dev/null +++ b/tests/old-code/test_old_permuted_dense.h @@ -0,0 +1,349 @@ +#ifndef TEST_OLD_PERMUTED_DENSE_H +#define TEST_OLD_PERMUTED_DENSE_H + +#include "minunit.h" +#include "old-code/old_permuted_dense.h" +#include "test_helpers.h" +#include "utils/CSR_matrix.h" +#include "utils/permuted_dense.h" +#include +#include + +/* Direct unit tests for the legacy CSR-pd BTA kernels in old-code. They no + longer sit on a production path (matrix_BTA dispatcher hard-wires the + CSC variants), but the kernels remain as reference implementations and + as the CSR side of the cross-comparison test in test_permuted_dense.h. */ + +const char *test_BTA_pd_csr_basic(void) +{ + /* CSR_matrix A: m=4, n=5, with nonzeros: + row 0: cols {1, 4} + row 1: cols {0, 2} + row 2: cols {2} + row 3: cols {1, 4} */ + CSR_matrix *A = new_CSR_matrix(4, 5, 7); + A->p[0] = 0; + A->p[1] = 2; + A->p[2] = 4; + A->p[3] = 5; + A->p[4] = 7; + int Ai[7] = {1, 4, 0, 2, 2, 1, 4}; + double Ax[7] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}; + memcpy(A->i, Ai, sizeof Ai); + memcpy(A->x, Ax, sizeof Ax); + + /* PD B: m=4, n=4, row_perm = [1, 3], col_perm = [0, 2], X = [[10, 20], [30, + * 40]]. */ + int row_perm_B[2] = {1, 3}; + int col_perm_B[2] = {0, 2}; + double XB[4] = {10.0, 20.0, 30.0, 40.0}; + matrix *B_m = new_permuted_dense(4, 4, 2, 2, row_perm_B, col_perm_B, XB); + permuted_dense *B = (permuted_dense *) B_m; + + matrix *out_m = BTA_pd_csr_alloc(B, A); + permuted_dense *out = (permuted_dense *) out_m; + + /* Expected col_active: union of A's columns in rows 1 and 3 + = {0, 2} ∪ {1, 4} = {0, 1, 2, 4}, size 4. */ + int expected_col_perm[4] = {0, 1, 2, 4}; + mu_assert("out m", out_m->m == 4); /* B.n */ + mu_assert("out n", out_m->n == 5); /* A.n */ + mu_assert("m0", out->m0 == 2); + mu_assert("n0", out->n0 == 4); + mu_assert("row_perm", cmp_int_array(out->row_perm, col_perm_B, 2)); + mu_assert("col_perm", cmp_int_array(out->col_perm, expected_col_perm, 4)); + + BTA_pd_csr_fill_values(B, A, out); + + /* Reference: scatter A and B to dense 4x{5,4}, compute B^T A, extract + block at (col_perm_B × out->col_perm). Scatter inlined locally to + avoid coupling to the static helpers in tests/utils/test_permuted_dense.h. */ + double *A_d = (double *) calloc(4 * 5, sizeof(double)); + double *B_d = (double *) calloc(4 * 4, sizeof(double)); + for (int i = 0; i < A->m; i++) + for (int e = A->p[i]; e < A->p[i + 1]; e++) A_d[i * 5 + A->i[e]] = A->x[e]; + for (int kk = 0; kk < B->m0; kk++) + for (int jj = 0; jj < B->n0; jj++) + B_d[B->row_perm[kk] * 4 + B->col_perm[jj]] = B->X[kk * B->n0 + jj]; + + double C_ref[4 * 5]; + memset(C_ref, 0, sizeof C_ref); + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 5; j++) + { + double s = 0.0; + for (int k = 0; k < 4; k++) + { + s += B_d[k * 4 + i] * A_d[k * 5 + j]; + } + C_ref[i * 5 + j] = s; + } + } + double expected_X[8]; + for (int ii = 0; ii < 2; ii++) + { + for (int jj = 0; jj < 4; jj++) + { + expected_X[ii * 4 + jj] = + C_ref[col_perm_B[ii] * 5 + expected_col_perm[jj]]; + } + } + mu_assert("values", cmp_double_array(out->X, expected_X, 8)); + + free(A_d); + free(B_d); + free_matrix(out_m); + free_matrix(B_m); + free_CSR_matrix(A); + return 0; +} + +/* BTA(CSR_matrix A, PD B) where A is a leaf-variable Jacobian (identity-in-block). + A is (4, 8): row k has a 1 at column 4+k (variable v of size 4 at var_id=4). + Expected: col_perm_out = {4+row_perm_B[kk]} = {4+1, 4+3} = {5, 7}, and X_C = + X_B^T. */ +const char *test_BTA_pd_csr_leaf_variable(void) +{ + CSR_matrix *A = new_CSR_matrix(4, 8, 4); + for (int k = 0; k < 4; k++) + { + A->p[k] = k; + A->i[k] = 4 + k; + A->x[k] = 1.0; + } + A->p[4] = 4; + + int row_perm_B[2] = {1, 3}; + int col_perm_B[2] = {0, 2}; + double XB[4] = {10.0, 20.0, 30.0, 40.0}; /* row-major (2, 2) */ + matrix *B_m = new_permuted_dense(4, 4, 2, 2, row_perm_B, col_perm_B, XB); + permuted_dense *B = (permuted_dense *) B_m; + + matrix *out_m = BTA_pd_csr_alloc(B, A); + permuted_dense *out = (permuted_dense *) out_m; + + int expected_col_perm[2] = {5, 7}; + mu_assert("m0", out->m0 == 2); + mu_assert("n0", out->n0 == 2); + mu_assert("row_perm", cmp_int_array(out->row_perm, col_perm_B, 2)); + mu_assert("col_perm", cmp_int_array(out->col_perm, expected_col_perm, 2)); + + BTA_pd_csr_fill_values(B, A, out); + + /* X_C should be X_B^T = [[10, 30], [20, 40]] row-major. */ + double expected_X[4] = {10.0, 30.0, 20.0, 40.0}; + mu_assert("values", cmp_double_array(out->X, expected_X, 4)); + + free_matrix(out_m); + free_matrix(B_m); + free_CSR_matrix(A); + return 0; +} + +/* BTA(CSR_matrix A, PD B) where A has no entries in any row of row_perm_B. + Output dense block should have n0 = 0. */ +const char *test_BTA_pd_csr_no_overlap(void) +{ + /* A: rows 0 and 2 have entries; rows 1 and 3 (row_perm_B) are empty. */ + CSR_matrix *A = new_CSR_matrix(4, 5, 3); + A->p[0] = 0; + A->p[1] = 2; + A->p[2] = 2; + A->p[3] = 3; + A->p[4] = 3; + int Ai[3] = {1, 4, 2}; + double Ax[3] = {1.0, 2.0, 3.0}; + memcpy(A->i, Ai, sizeof Ai); + memcpy(A->x, Ax, sizeof Ax); + + int row_perm_B[2] = {1, 3}; /* rows that ARE empty in A */ + int col_perm_B[2] = {0, 2}; + double XB[4] = {1.0, 2.0, 3.0, 4.0}; + matrix *B_m = new_permuted_dense(4, 4, 2, 2, row_perm_B, col_perm_B, XB); + permuted_dense *B = (permuted_dense *) B_m; + + matrix *out_m = BTA_pd_csr_alloc(B, A); + permuted_dense *out = (permuted_dense *) out_m; + + mu_assert("m0", out->m0 == 2); + mu_assert("n0", out->n0 == 0); + + /* Fill should be a no-op (0-sized dense block). */ + BTA_pd_csr_fill_values(B, A, out); + + free_matrix(out_m); + free_matrix(B_m); + free_CSR_matrix(A); + return 0; +} + +/* Tests for the production CSR-pd kernel pair (B=CSR, A=PD). The BTA fill + variant lives here in old-code because production only calls the BTDA + path; the alloc is still in src/utils/permuted_dense.c. */ + +/* BTA(CSR_matrix B, PD A): basic correctness against a dense reference. + A is (4, 5) PD with row_perm = [1, 3], col_perm = [0, 2], dense block (2, 2). + B is (4, 4) CSR_matrix with arbitrary sparsity. */ +const char *test_BTA_csr_pd_basic(void) +{ + /* PD A: m=4, n=5, row_perm = [1, 3], col_perm = [0, 2]. + X = [[1, 2], [3, 4]] (2 x 2 row-major). */ + int row_perm_A[2] = {1, 3}; + int col_perm_A[2] = {0, 2}; + double XA[4] = {1.0, 2.0, 3.0, 4.0}; + matrix *A_m = new_permuted_dense(4, 5, 2, 2, row_perm_A, col_perm_A, XA); + permuted_dense *A = (permuted_dense *) A_m; + + /* CSR_matrix B: m=4, n=4. + row 0: cols {1, 3} + row 1: cols {0, 2} + row 2: cols {2} + row 3: cols {0, 3} */ + CSR_matrix *B = new_CSR_matrix(4, 4, 7); + B->p[0] = 0; + B->p[1] = 2; + B->p[2] = 4; + B->p[3] = 5; + B->p[4] = 7; + int Bi[7] = {1, 3, 0, 2, 2, 0, 3}; + double Bx[7] = {10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0}; + memcpy(B->i, Bi, sizeof Bi); + memcpy(B->x, Bx, sizeof Bx); + + matrix *out_m = BTA_csr_pd_alloc(B, A); + permuted_dense *out = (permuted_dense *) out_m; + + /* row_active = union of B's cols in rows 1 and 3 + = {0, 2} ∪ {0, 3} = {0, 2, 3}, size 3. */ + int expected_row_perm[3] = {0, 2, 3}; + mu_assert("out m", out_m->m == 4); /* B.n */ + mu_assert("out n", out_m->n == 5); /* A.n */ + mu_assert("m0", out->m0 == 3); + mu_assert("n0", out->n0 == 2); + mu_assert("row_perm", cmp_int_array(out->row_perm, expected_row_perm, 3)); + mu_assert("col_perm", cmp_int_array(out->col_perm, col_perm_A, 2)); + + BTA_csr_pd_fill_values(B, A, out); + + /* Reference: dense B^T A, extract block at (row_active × col_perm_A). + Scatter inlined locally to avoid coupling to static helpers. */ + double *A_d = (double *) calloc(4 * 5, sizeof(double)); + double *B_d = (double *) calloc(4 * 4, sizeof(double)); + for (int kk = 0; kk < A->m0; kk++) + for (int jj = 0; jj < A->n0; jj++) + A_d[A->row_perm[kk] * 5 + A->col_perm[jj]] = A->X[kk * A->n0 + jj]; + for (int i = 0; i < B->m; i++) + for (int e = B->p[i]; e < B->p[i + 1]; e++) B_d[i * 4 + B->i[e]] = B->x[e]; + + double C_ref[4 * 5]; + memset(C_ref, 0, sizeof C_ref); + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 5; j++) + { + double s = 0.0; + for (int k = 0; k < 4; k++) + { + s += B_d[k * 4 + i] * A_d[k * 5 + j]; + } + C_ref[i * 5 + j] = s; + } + } + double expected_X[6]; + for (int ii = 0; ii < 3; ii++) + { + for (int jj = 0; jj < 2; jj++) + { + expected_X[ii * 2 + jj] = + C_ref[expected_row_perm[ii] * 5 + col_perm_A[jj]]; + } + } + mu_assert("values", cmp_double_array(out->X, expected_X, 6)); + + free(A_d); + free(B_d); + free_matrix(out_m); + free_CSR_matrix(B); + free_matrix(A_m); + return 0; +} + +/* BTA(CSR_matrix B, PD A) where B is a leaf-variable Jacobian (identity-in-block). + B is (4, 8): row k has a 1 at column 4+k (variable v of size 4 at var_id=4). + Expected: row_perm_out = {4+row_perm_A[kk]} = {4+1, 4+3} = {5, 7}, X_C = X_A. */ +const char *test_BTA_csr_pd_leaf_variable(void) +{ + int row_perm_A[2] = {1, 3}; + int col_perm_A[2] = {0, 2}; + double XA[4] = {1.0, 2.0, 3.0, 4.0}; + matrix *A_m = new_permuted_dense(4, 5, 2, 2, row_perm_A, col_perm_A, XA); + permuted_dense *A = (permuted_dense *) A_m; + + CSR_matrix *B = new_CSR_matrix(4, 8, 4); + for (int k = 0; k < 4; k++) + { + B->p[k] = k; + B->i[k] = 4 + k; + B->x[k] = 1.0; + } + B->p[4] = 4; + + matrix *out_m = BTA_csr_pd_alloc(B, A); + permuted_dense *out = (permuted_dense *) out_m; + + int expected_row_perm[2] = {5, 7}; + mu_assert("m0", out->m0 == 2); + mu_assert("n0", out->n0 == 2); + mu_assert("row_perm", cmp_int_array(out->row_perm, expected_row_perm, 2)); + mu_assert("col_perm", cmp_int_array(out->col_perm, col_perm_A, 2)); + + BTA_csr_pd_fill_values(B, A, out); + + /* X_C should equal X_A. */ + mu_assert("values", cmp_double_array(out->X, XA, 4)); + + free_matrix(out_m); + free_CSR_matrix(B); + free_matrix(A_m); + return 0; +} + +/* BTA(CSR_matrix B, PD A) where B has no entries in any row of row_perm_A. + Output dense block should have m0 = 0. */ +const char *test_BTA_csr_pd_no_overlap(void) +{ + int row_perm_A[2] = {1, 3}; + int col_perm_A[2] = {0, 2}; + double XA[4] = {1.0, 2.0, 3.0, 4.0}; + matrix *A_m = new_permuted_dense(4, 5, 2, 2, row_perm_A, col_perm_A, XA); + permuted_dense *A = (permuted_dense *) A_m; + + /* B: rows 0 and 2 have entries; rows 1 and 3 (row_perm_A) are empty. */ + CSR_matrix *B = new_CSR_matrix(4, 4, 3); + B->p[0] = 0; + B->p[1] = 2; + B->p[2] = 2; + B->p[3] = 3; + B->p[4] = 3; + int Bi[3] = {0, 1, 2}; + double Bx[3] = {1.0, 2.0, 3.0}; + memcpy(B->i, Bi, sizeof Bi); + memcpy(B->x, Bx, sizeof Bx); + + matrix *out_m = BTA_csr_pd_alloc(B, A); + permuted_dense *out = (permuted_dense *) out_m; + + mu_assert("m0", out->m0 == 0); + mu_assert("n0", out->n0 == 2); + + /* Fill should be a no-op (0-sized dense block on the row axis). */ + BTA_csr_pd_fill_values(B, A, out); + + free_matrix(out_m); + free_CSR_matrix(B); + free_matrix(A_m); + return 0; +} + +#endif /* TEST_OLD_PERMUTED_DENSE_H */ diff --git a/tests/problem/test_problem.h b/tests/problem/test_problem.h index fbb512f..d245cfa 100644 --- a/tests/problem/test_problem.h +++ b/tests/problem/test_problem.h @@ -175,7 +175,7 @@ const char *test_problem_jacobian(void) problem_constraint_forward(prob, u); problem_jacobian(prob); - CSR_Matrix *jac = prob->jacobian; + CSR_matrix *jac = prob->jacobian; /* Check dimensions */ mu_assert("jac rows wrong", jac->m == 2); @@ -239,7 +239,7 @@ const char *test_problem_jacobian_multi(void) problem_constraint_forward(prob, u); problem_jacobian(prob); - CSR_Matrix *jac = prob->jacobian; + CSR_matrix *jac = prob->jacobian; /* Check dimensions: 4 rows (2 + 2), 2 cols */ mu_assert("jac rows wrong", jac->m == 4); @@ -322,7 +322,7 @@ const char *test_problem_hessian(void) double w_obj = 2.0; problem_hessian(prob, w_obj, w); - CSR_Matrix *H = prob->lagrange_hessian; + CSR_matrix *H = prob->lagrange_hessian; /* Check dimensions: 3x3 symmetric */ mu_assert("H rows wrong", H->m == 3); diff --git a/tests/profiling/profile_BTA_pd_csr_vs_csc.h b/tests/profiling/profile_BTA_pd_csr_vs_csc.h new file mode 100644 index 0000000..df1cc46 --- /dev/null +++ b/tests/profiling/profile_BTA_pd_csr_vs_csc.h @@ -0,0 +1,142 @@ +#ifndef PROFILE_BTA_PD_CSR_VS_CSC_H +#define PROFILE_BTA_PD_CSR_VS_CSC_H + +#include +#include +#include +#include + +#include "minunit.h" +#include "old-code/old_permuted_dense.h" +#include "test_helpers.h" +#include "utils/CSC_matrix.h" +#include "utils/CSR_matrix.h" +#include "utils/Timer.h" +#include "utils/permuted_dense.h" +#include "utils/utils.h" + +/* Microbenchmark: compare BTA_csr_pd vs BTA_csc_pd on trimmed_log_reg-shaped + (m=2000, n0_B=785) inputs at two A densities. Output is one fill timing + pair per row of the table. */ +static void run_bench_one_density(int m, int n0_B, int n_A, int nnz_per_row, + int N_ITERS, const char *label) +{ + /* B: PD with full m × n0_B dense block (row_perm = 0..m-1, col_perm = + 0..n0_B-1). Values arbitrary. */ + int *row_perm_B = (int *) malloc(m * sizeof(int)); + int *col_perm_B = (int *) malloc(n0_B * sizeof(int)); + double *XB = (double *) malloc(m * n0_B * sizeof(double)); + for (int i = 0; i < m; i++) row_perm_B[i] = i; + for (int j = 0; j < n0_B; j++) col_perm_B[j] = j; + for (int k = 0; k < m * n0_B; k++) XB[k] = (double) (k % 37) * 0.013 + 0.1; + + /* B's global shape: (m, n_B_global). Pick n_B_global = n0_B (no padding). */ + matrix *B_m = new_permuted_dense(m, n0_B, m, n0_B, row_perm_B, col_perm_B, XB); + permuted_dense *B = (permuted_dense *) B_m; + + /* A: (m × n_A) CSR with `nnz_per_row` evenly-spaced nonzeros per row. */ + int total_nnz = m * nnz_per_row; + CSR_matrix *A_csr = new_CSR_matrix(m, n_A, total_nnz); + for (int row = 0; row <= m; row++) A_csr->p[row] = row * nnz_per_row; + srand(42); + for (int row = 0; row < m; row++) + { + /* Pick nnz_per_row distinct columns by sorted random sampling. */ + int *cols = (int *) malloc(nnz_per_row * sizeof(int)); + int picked = 0; + while (picked < nnz_per_row) + { + int c = rand() % n_A; + int dup = 0; + for (int k = 0; k < picked; k++) + if (cols[k] == c) + { + dup = 1; + break; + } + if (!dup) cols[picked++] = c; + } + /* Insertion sort to keep CSR column-index invariant. */ + for (int a = 1; a < nnz_per_row; a++) + { + int v = cols[a]; + int b = a - 1; + while (b >= 0 && cols[b] > v) + { + cols[b + 1] = cols[b]; + b--; + } + cols[b + 1] = v; + } + for (int k = 0; k < nnz_per_row; k++) + { + int e = A_csr->p[row] + k; + A_csr->i[e] = cols[k]; + A_csr->x[e] = (double) ((row * 31 + cols[k]) % 53) * 0.027 + 0.05; + } + free(cols); + } + + /* CSC view of A. */ + int *iwork = (int *) malloc(MAX(m, n_A) * sizeof(int)); + CSC_matrix *A_csc = csr_to_csc_alloc(A_csr, iwork); + csr_to_csc_fill_values(A_csr, A_csc, iwork); + + /* Allocate outputs once for each variant. */ + matrix *C_csr_m = BTA_pd_csr_alloc(B, A_csr); + permuted_dense *C_csr = (permuted_dense *) C_csr_m; + matrix *C_csc_m = BTA_pd_csc_alloc(B, A_csc); + permuted_dense *C_csc = (permuted_dense *) C_csc_m; + + /* d for BTDA: all ones, so C = B^T diag(d) A = B^T A. */ + double *d_ones = (double *) malloc(m * sizeof(double)); + for (int i = 0; i < m; i++) d_ones[i] = 1.0; + + /* Warm-up + time CSR fill. */ + Timer t1; + BTDA_pd_csr_fill_values(B, d_ones, A_csr, C_csr); + clock_gettime(CLOCK_MONOTONIC, &t1.start); + for (int it = 0; it < N_ITERS; it++) + BTDA_pd_csr_fill_values(B, d_ones, A_csr, C_csr); + clock_gettime(CLOCK_MONOTONIC, &t1.end); + double t_csr_ms = GET_ELAPSED_SECONDS(t1) * 1000.0 / N_ITERS; + + /* Warm-up + time CSC fill. */ + Timer t2; + BTDA_pd_csc_fill_values(B, d_ones, A_csc, C_csc); + clock_gettime(CLOCK_MONOTONIC, &t2.start); + for (int it = 0; it < N_ITERS; it++) + BTDA_pd_csc_fill_values(B, d_ones, A_csc, C_csc); + clock_gettime(CLOCK_MONOTONIC, &t2.end); + double t_csc_ms = GET_ELAPSED_SECONDS(t2) * 1000.0 / N_ITERS; + + printf(" %-22s CSR = %7.3f ms CSC = %7.3f ms ratio CSR/CSC = %.2fx\n", + label, t_csr_ms, t_csc_ms, t_csr_ms / t_csc_ms); + + free_matrix(C_csr_m); + free_matrix(C_csc_m); + free_matrix(B_m); + free_CSR_matrix(A_csr); + free_CSC_matrix(A_csc); + free(iwork); + free(row_perm_B); + free(col_perm_B); + free(XB); + free(d_ones); +} + +const char *profile_BTA_pd_csr_vs_csc(void) +{ + int m = 2000; + int n0_B = 785; + int n_A = 2000; + int N_ITERS = 50; + + printf("\nBTA pd × sparse fill benchmark (m=%d, n0_B=%d, n_A=%d, %d iters):\n", + m, n0_B, n_A, N_ITERS); + run_bench_one_density(m, n0_B, n_A, 1, N_ITERS, "leaf-var (1 nnz/row):"); + run_bench_one_density(m, n0_B, n_A, 50, N_ITERS, "dense-ish (50 nnz/row):"); + return 0; +} + +#endif /* PROFILE_BTA_PD_CSR_VS_CSC_H */ diff --git a/tests/profiling/profile_left_matmul.h b/tests/profiling/profile_left_matmul.h index 4644a41..db6b832 100644 --- a/tests/profiling/profile_left_matmul.h +++ b/tests/profiling/profile_left_matmul.h @@ -13,10 +13,11 @@ const char *profile_left_matmul(void) { - /* A @ X where A is 50 x 50 dense stored in CSR and X is 50 x 50 variable */ + /* A @ X where A is 50 x 50 dense stored in CSR_matrix and X is 50 x 50 variable + */ int n = 100; expr *X = new_variable(n, n, 0, n * n); - CSR_Matrix *A = new_csr_matrix(n, n, n * n); + CSR_matrix *A = new_CSR_matrix(n, n, n * n); for (int i = 0; i < n * n; i++) { A->x[i] = 1.0; /* dense matrix of all ones */ @@ -56,7 +57,7 @@ const char *profile_left_matmul(void) GET_ELAPSED_SECONDS(timer)); free(x_vals); - free_csr_matrix(A); + free_CSR_matrix(A); free_expr(AX); return 0; } diff --git a/tests/profiling/profile_log_reg.h b/tests/profiling/profile_log_reg.h new file mode 100644 index 0000000..31bc075 --- /dev/null +++ b/tests/profiling/profile_log_reg.h @@ -0,0 +1,177 @@ +#ifndef PROFILE_LOG_REG_H +#define PROFILE_LOG_REG_H + +#include +#include +#include +#include + +#include "atoms/affine.h" +#include "atoms/elementwise_full_dom.h" +#include "expr.h" +#include "minunit.h" +#include "utils/CSR_sum.h" +#include "utils/Timer.h" +#include "utils/permuted_dense.h" + +/* Profile and validate Jacobian + Hessian of obj = sum(logistic(A x)). + + Path A: the engine's expression DAG (CSR_matrix/CSC_matrix chain rule). + Path B: hardcoded chain rule using permuted_dense kernels for the dense + linear algebra (DA and ATDA), plus the engine's CSR_matrix row-sum + primitives for J_sum. + + Forward pass is excluded from timing. */ +const char *profile_log_reg(void) +{ + int m = 2000; + int n = 785; + + /* ---- Random A and initial x ---- */ + double *A_data = (double *) malloc((size_t) m * n * sizeof(double)); + double *u = (double *) malloc(n * sizeof(double)); + srand(42); + for (int i = 0; i < m * n; i++) + { + A_data[i] = (double) rand() / RAND_MAX - 0.5; + } + for (int i = 0; i < n; i++) + { + u[i] = (double) rand() / RAND_MAX - 0.5; + } + + /* ---- Build expression DAG (shared by both paths) ---- */ + expr *x = new_variable(n, 1, 0, n); + expr *Ax = new_left_matmul_dense(NULL, x, m, n, A_data); + expr *log_obj = new_logistic(Ax); + expr *obj = new_sum(log_obj, -1); + jacobian_init(obj); + wsum_hess_init(obj); + + /* Forward (untimed). */ + obj->forward(obj, u); + + /* ---- Path A: time eval_jacobian and eval_wsum_hess separately ---- */ + Timer t_a_jac, t_a_hess; + double w_one = 1.0; + clock_gettime(CLOCK_MONOTONIC, &t_a_jac.start); + obj->eval_jacobian(obj); + clock_gettime(CLOCK_MONOTONIC, &t_a_jac.end); + clock_gettime(CLOCK_MONOTONIC, &t_a_hess.start); + obj->eval_wsum_hess(obj, &w_one); + clock_gettime(CLOCK_MONOTONIC, &t_a_hess.end); + double sec_a_jac = GET_ELAPSED_SECONDS(t_a_jac); + double sec_a_hess = GET_ELAPSED_SECONDS(t_a_hess); + + /* ---- Path B setup (untimed) ---- */ + int *full_rows = (int *) malloc(m * sizeof(int)); + int *full_cols = (int *) malloc(n * sizeof(int)); + for (int i = 0; i < m; i++) full_rows[i] = i; + for (int j = 0; j < n; j++) full_cols[j] = j; + + matrix *A_pd_M = new_permuted_dense(m, n, m, n, full_rows, full_cols, A_data); + permuted_dense *A_pd = (permuted_dense *) A_pd_M; + matrix *Jlog_M = new_permuted_dense(m, n, m, n, full_rows, full_cols, NULL); + permuted_dense *Jlog_pd = (permuted_dense *) Jlog_M; + matrix *H_pd_M = ATA_pd_alloc(A_pd); + permuted_dense *H_pd = (permuted_dense *) H_pd_M; + + free(full_rows); + free(full_cols); + + /* CSR_matrix scaffolding for the row-sum step (PD owns the cached CSR_matrix + * view). */ + CSR_matrix *Jlog_csr = Jlog_M->to_csr(Jlog_M); + CSR_matrix *Jobj_csr = new_CSR_matrix(1, n, n); + int *iwork = (int *) malloc((size_t) m * n * sizeof(int)); + int *idx_map = (int *) malloc((size_t) m * n * sizeof(int)); + sum_all_rows_csr_alloc(Jlog_csr, Jobj_csr, iwork, idx_map); + + double *d2 = (double *) malloc(m * sizeof(double)); + double *w_ones = (double *) malloc(m * sizeof(double)); + for (int i = 0; i < m; i++) w_ones[i] = 1.0; + + /* ---- Path B: time the manual chain rule, Jacobian and Hessian separately ---- + */ + Timer t_b_jac, t_b_hess; + /* dwork = sigmoid(z); used as the diagonal in DA below and (still in + dwork) as sigmas read by local_wsum_hess. */ + clock_gettime(CLOCK_MONOTONIC, &t_b_jac.start); + log_obj->local_jacobian(log_obj, log_obj->work->dwork); + DA_pd_fill_values(log_obj->work->dwork, A_pd, Jlog_pd); + memset(Jobj_csr->x, 0, Jobj_csr->nnz * sizeof(double)); + accumulator(Jlog_csr->x, Jlog_csr->nnz, idx_map, Jobj_csr->x); + clock_gettime(CLOCK_MONOTONIC, &t_b_jac.end); + clock_gettime(CLOCK_MONOTONIC, &t_b_hess.start); + log_obj->local_wsum_hess(log_obj, d2, w_ones); + ATDA_pd_fill_values(A_pd, d2, H_pd); + clock_gettime(CLOCK_MONOTONIC, &t_b_hess.end); + double sec_b_jac = GET_ELAPSED_SECONDS(t_b_jac); + double sec_b_hess = GET_ELAPSED_SECONDS(t_b_hess); + + printf("\n"); + printf(" Jacobian Hessian Total\n"); + printf(" Path A (engine CSR_matrix/CSC_matrix): %10.6fs %10.6fs %10.6fs\n", + sec_a_jac, sec_a_hess, sec_a_jac + sec_a_hess); + printf(" Path B (permuted_dense): %10.6fs %10.6fs %10.6fs\n", sec_b_jac, + sec_b_hess, sec_b_jac + sec_b_hess); + printf(" Speedup (A / B): %10.2fx %10.2fx %10.2fx\n", + sec_a_jac / sec_b_jac, sec_a_hess / sec_b_hess, + (sec_a_jac + sec_a_hess) / (sec_b_jac + sec_b_hess)); + + /* ---- Compare Jacobian (1 x n, both have full sparsity) ---- */ + CSR_matrix *J_a = obj->jacobian->to_csr(obj->jacobian); + mu_assert("J n mismatch", J_a->n == Jobj_csr->n); + mu_assert("J nnz mismatch", J_a->nnz == Jobj_csr->nnz); + double max_J_diff = 0.0; + for (int j = 0; j < J_a->nnz; j++) + { + double diff = fabs(J_a->x[j] - Jobj_csr->x[j]); + if (diff > max_J_diff) max_J_diff = diff; + } + printf(" Jacobian max abs diff: %10.3e\n", max_J_diff); + mu_assert("Jacobian mismatch", max_J_diff < 1e-10); + + /* ---- Compare Hessian (n x n): scatter Path A's CSR_matrix into a dense + n x n array, compare to H_pd->X (already dense row-major). + Extract the CSR_matrix view ONCE: PD's to_csr does an O(m0 * n0) + memcpy refresh per call, so calling it inside the inner loop is + quadratically expensive. ---- */ + CSR_matrix *H_a = obj->wsum_hess->to_csr(obj->wsum_hess); + double *H_a_dense = (double *) calloc((size_t) n * n, sizeof(double)); + for (int i = 0; i < n; i++) + { + for (int e = H_a->p[i]; e < H_a->p[i + 1]; e++) + { + int col = H_a->i[e]; + H_a_dense[i * n + col] = H_a->x[e]; + } + } + double max_H_diff = 0.0; + for (size_t k = 0; k < (size_t) n * n; k++) + { + double diff = fabs(H_a_dense[k] - H_pd->X[k]); + if (diff > max_H_diff) max_H_diff = diff; + } + printf(" Hessian max abs diff: %10.3e\n", max_H_diff); + mu_assert("Hessian mismatch", max_H_diff < 1e-10); + + /* ---- Cleanup ---- */ + free(H_a_dense); + free(d2); + free(w_ones); + free(iwork); + free(idx_map); + free_CSR_matrix(Jobj_csr); + /* Jlog_csr is owned by Jlog_M's cache; released by free_matrix below. */ + free_matrix(H_pd_M); + free_matrix(Jlog_M); + free_matrix(A_pd_M); + free_expr(obj); + free(A_data); + free(u); + + return 0; +} + +#endif /* PROFILE_LOG_REG_H */ diff --git a/tests/profiling/profile_trimmed_log_reg.h b/tests/profiling/profile_trimmed_log_reg.h new file mode 100644 index 0000000..3fe2ae6 --- /dev/null +++ b/tests/profiling/profile_trimmed_log_reg.h @@ -0,0 +1,96 @@ +#ifndef PROFILE_TRIMMED_LOG_REG_H +#define PROFILE_TRIMMED_LOG_REG_H + +#include +#include + +#include "atoms/affine.h" +#include "atoms/bivariate_full_dom.h" +#include "atoms/elementwise_full_dom.h" +#include "expr.h" +#include "minunit.h" +#include "subexpr.h" +#include "utils/Timer.h" + +/* Profile Jacobian + Hessian of: + obj = sum( w ∘ logistic( -(y ∘ (A·theta)) ) ) + + theta (n x 1), w (m x 1) : variables (n_vars = n + m) + A (m x n) : dense constant + y (m x 1) : constant in {-1, +1}, wrapped as PARAM_FIXED + + Forward pass is excluded from timing. */ +const char *profile_trimmed_log_reg(void) +{ + int m = 2000; + int n = 785; + int N_HESS_ITERS = 10; + int n_vars = n + m; + + /* ---- Random inputs ---- */ + srand(42); + double *A_data = (double *) malloc((size_t) m * n * sizeof(double)); + double *y_data = (double *) malloc((size_t) m * sizeof(double)); + double *u = (double *) malloc((size_t) n_vars * sizeof(double)); + for (int i = 0; i < m * n; i++) + { + A_data[i] = (double) rand() / RAND_MAX - 0.5; + } + for (int i = 0; i < m; i++) + { + y_data[i] = (rand() % 2 == 0) ? 1.0 : -1.0; + } + for (int i = 0; i < n_vars; i++) + { + u[i] = (double) rand() / RAND_MAX - 0.5; + } + + /* ---- Build expression DAG ---- */ + expr *theta = new_variable(n, 1, 0, n_vars); + expr *w = new_variable(m, 1, n, n_vars); + + expr *y_param = new_parameter(m, 1, PARAM_FIXED, n_vars, y_data); + + expr *A_theta = new_left_matmul_dense(NULL, theta, m, n, A_data); + expr *y_A_theta = new_vector_mult(y_param, A_theta); + expr *neg_node = new_neg(y_A_theta); + expr *sig = new_logistic(neg_node); + expr *w_sig = new_elementwise_mult(w, sig); + expr *obj = new_sum(w_sig, -1); + + jacobian_init(obj); + wsum_hess_init(obj); + + /* Forward (untimed). */ + obj->forward(obj, u); + + double w_one = 1.0; + Timer t_jac, t_hess; + clock_gettime(CLOCK_MONOTONIC, &t_jac.start); + obj->eval_jacobian(obj); + clock_gettime(CLOCK_MONOTONIC, &t_jac.end); + + obj->eval_wsum_hess(obj, &w_one); /* warm-up */ + clock_gettime(CLOCK_MONOTONIC, &t_hess.start); + for (int it = 0; it < N_HESS_ITERS; it++) + { + obj->eval_wsum_hess(obj, &w_one); + } + clock_gettime(CLOCK_MONOTONIC, &t_hess.end); + + double sec_jac = GET_ELAPSED_SECONDS(t_jac); + double sec_hess = GET_ELAPSED_SECONDS(t_hess) / N_HESS_ITERS; + + printf("\n"); + printf("trimmed_log_reg (m=%d, n=%d):\n", m, n); + printf(" jacobian = %10.6fs hessian = %10.6fs (avg over %d)\n", sec_jac, + sec_hess, N_HESS_ITERS); + + free_expr(obj); + free(A_data); + free(y_data); + free(u); + return 0; +} + +#endif /* PROFILE_TRIMMED_LOG_REG_H */ diff --git a/tests/test_helpers.c b/tests/test_helpers.c index 90d8446..7d5244a 100644 --- a/tests/test_helpers.c +++ b/tests/test_helpers.c @@ -4,7 +4,8 @@ #include #include "expr.h" -#include "utils/CSR_Matrix.h" +#include "utils/CSR_matrix.h" +#include "utils/matrix.h" #define EPSILON 1e-7 @@ -44,6 +45,32 @@ int cmp_int_array(const int *actual, const int *expected, int size) return 1; } +int cmp_sparsity(matrix *M, const int *exp_p, const int *exp_i, int m, int nnz) +{ + if (M->m != m) + { + printf(" FAILED: M->m = %d, expected %d\n", M->m, m); + return 0; + } + if (M->nnz != nnz) + { + printf(" FAILED: M->nnz = %d, expected %d\n", M->nnz, nnz); + return 0; + } + CSR_matrix *csr = M->to_csr(M); + return cmp_int_array(csr->p, exp_p, m + 1) && cmp_int_array(csr->i, exp_i, nnz); +} + +int cmp_values(const matrix *M, const double *exp_x, int nnz) +{ + if (M->nnz != nnz) + { + printf(" FAILED: M->nnz = %d, expected %d\n", M->nnz, nnz); + return 0; + } + return cmp_double_array(M->x, exp_x, nnz); +} + #ifndef M_PI #define M_PI 3.14159265358979323846 #endif @@ -56,7 +83,7 @@ static double randn(void) return sqrt(-2.0 * log(u1)) * cos(2.0 * M_PI * u2); } -CSR_Matrix *new_csr_random(int m, int n, double density) +CSR_matrix *new_csr_random(int m, int n, double density) { /* Single pass: over-allocate, fill, then copy to exact size */ int cap = (int) ((double) m * (double) n * density * 1.5) + m; @@ -87,7 +114,7 @@ CSR_Matrix *new_csr_random(int m, int n, double density) } tmp_p[m] = nnz; - CSR_Matrix *A = new_csr_matrix(m, n, nnz); + CSR_matrix *A = new_CSR_matrix(m, n, nnz); memcpy(A->p, tmp_p, ((size_t) m + 1) * sizeof(int)); memcpy(A->i, tmp_i, (size_t) nnz * sizeof(int)); memcpy(A->x, tmp_x, (size_t) nnz * sizeof(double)); diff --git a/tests/test_helpers.h b/tests/test_helpers.h index fd47151..b3e96ce 100644 --- a/tests/test_helpers.h +++ b/tests/test_helpers.h @@ -2,7 +2,8 @@ #define TEST_HELPERS_H #include "expr.h" -#include "utils/CSR_Matrix.h" +#include "utils/CSR_matrix.h" +#include "utils/matrix.h" /* Compare two double arrays directly * Returns 1 if all values match, 0 otherwise */ @@ -12,8 +13,17 @@ int cmp_double_array(const double *actual, const double *expected, int size); * Returns 1 if all values match, 0 otherwise */ int cmp_int_array(const int *actual, const int *expected, int size); -/* Create a random m x n CSR matrix with approximate nonzero density +/* Verify M has shape (m, *) with exactly nnz entries, and that M's CSR_matrix + * row pointers and column indices match exp_p (length m+1) and exp_i + * (length nnz). Returns 1 on full match, 0 otherwise. */ +int cmp_sparsity(matrix *M, const int *exp_p, const int *exp_i, int m, int nnz); + +/* Verify M has nnz entries and that its value array matches exp_x of + * length nnz. Returns 1 on full match, 0 otherwise. */ +int cmp_values(const matrix *M, const double *exp_x, int nnz); + +/* Create a random m x n CSR_matrix matrix with approximate nonzero density * in [0, 1]. Nonzero values are standard Gaussian (Box-Muller). */ -CSR_Matrix *new_csr_random(int m, int n, double density); +CSR_matrix *new_csr_random(int m, int n, double density); #endif /* TEST_HELPERS_H */ diff --git a/tests/utils/test_coo_matrix.h b/tests/utils/test_COO_matrix.h similarity index 83% rename from tests/utils/test_coo_matrix.h rename to tests/utils/test_COO_matrix.h index 53fa307..523b94a 100644 --- a/tests/utils/test_coo_matrix.h +++ b/tests/utils/test_COO_matrix.h @@ -4,16 +4,16 @@ #include "minunit.h" #include "test_helpers.h" -#include "utils/COO_Matrix.h" +#include "utils/COO_matrix.h" const char *test_csr_to_coo(void) { - /* Create a 3x3 CSR matrix A: + /* Create a 3x3 CSR_matrix matrix A: * [1.0 2.0 0.0] * [0.0 3.0 4.0] * [5.0 0.0 6.0] */ - CSR_Matrix *A = new_csr_matrix(3, 3, 6); + CSR_matrix *A = new_CSR_matrix(3, 3, 6); double Ax[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; int Ai[6] = {0, 1, 1, 2, 0, 2}; int Ap[4] = {0, 2, 4, 6}; @@ -21,7 +21,7 @@ const char *test_csr_to_coo(void) memcpy(A->i, Ai, 6 * sizeof(int)); memcpy(A->p, Ap, 4 * sizeof(int)); - COO_Matrix *coo = new_coo_matrix(A); + COO_matrix *coo = new_COO_matrix(A); mu_assert("m incorrect", coo->m == 3); mu_assert("n incorrect", coo->n == 3); @@ -35,8 +35,8 @@ const char *test_csr_to_coo(void) mu_assert("cols incorrect", cmp_int_array(coo->cols, expected_cols, 6)); mu_assert("vals incorrect", cmp_double_array(coo->x, expected_x, 6)); - free_coo_matrix(coo); - free_csr_matrix(A); + free_COO_matrix(coo); + free_CSR_matrix(A); return 0; } @@ -48,7 +48,7 @@ const char *test_csr_to_coo_lower_triangular(void) * [2 5 6] * [3 6 9] */ - CSR_Matrix *A = new_csr_matrix(3, 3, 9); + CSR_matrix *A = new_CSR_matrix(3, 3, 9); int Ap[4] = {0, 3, 6, 9}; int Ai[9] = {0, 1, 2, 0, 1, 2, 0, 1, 2}; double Ax[9] = {1, 2, 3, 2, 5, 6, 3, 6, 9}; @@ -56,7 +56,7 @@ const char *test_csr_to_coo_lower_triangular(void) memcpy(A->i, Ai, 9 * sizeof(int)); memcpy(A->x, Ax, 9 * sizeof(double)); - COO_Matrix *coo = new_coo_matrix_lower_triangular(A); + COO_matrix *coo = new_COO_matrix_lower_triangular(A); mu_assert("ltri m incorrect", coo->m == 3); mu_assert("ltri n incorrect", coo->n == 3); @@ -73,15 +73,15 @@ const char *test_csr_to_coo_lower_triangular(void) mu_assert("ltri value_map incorrect", cmp_int_array(coo->value_map, expected_map, 6)); - free_coo_matrix(coo); - free_csr_matrix(A); + free_COO_matrix(coo); + free_CSR_matrix(A); return 0; } const char *test_refresh_lower_triangular_coo(void) { - CSR_Matrix *A = new_csr_matrix(3, 3, 9); + CSR_matrix *A = new_CSR_matrix(3, 3, 9); int Ap[4] = {0, 3, 6, 9}; int Ai[9] = {0, 1, 2, 0, 1, 2, 0, 1, 2}; double Ax[9] = {1, 2, 3, 2, 5, 6, 3, 6, 9}; @@ -89,7 +89,7 @@ const char *test_refresh_lower_triangular_coo(void) memcpy(A->i, Ai, 9 * sizeof(int)); memcpy(A->x, Ax, 9 * sizeof(double)); - COO_Matrix *coo = new_coo_matrix_lower_triangular(A); + COO_matrix *coo = new_COO_matrix_lower_triangular(A); double vals2[9] = {10, 20, 30, 20, 50, 60, 30, 60, 90}; refresh_lower_triangular_coo(coo, vals2); @@ -97,8 +97,8 @@ const char *test_refresh_lower_triangular_coo(void) double expected_x[6] = {10, 20, 50, 30, 60, 90}; mu_assert("refresh vals incorrect", cmp_double_array(coo->x, expected_x, 6)); - free_coo_matrix(coo); - free_csr_matrix(A); + free_COO_matrix(coo); + free_CSR_matrix(A); return 0; } diff --git a/tests/utils/test_csc_matrix.h b/tests/utils/test_csc_matrix.h index 5459021..19d0e36 100644 --- a/tests/utils/test_csc_matrix.h +++ b/tests/utils/test_csc_matrix.h @@ -5,7 +5,7 @@ #include "minunit.h" #include "test_helpers.h" -#include "utils/CSC_Matrix.h" +#include "utils/CSC_matrix.h" /* Test ATA_alloc with a simple 3x3 example * A is 4x3 (4 rows, 3 columns): @@ -21,14 +21,14 @@ */ const char *test_ATA_alloc_simple(void) { - CSC_Matrix *A = new_csc_matrix(4, 3, 6); + CSC_matrix *A = new_CSC_matrix(4, 3, 6); int Ap[4] = {0, 2, 3, 6}; int Ai[5] = {0, 2, 1, 2, 1}; memcpy(A->p, Ap, 4 * sizeof(int)); memcpy(A->i, Ai, 5 * sizeof(int)); /* Compute C = A^T A */ - CSR_Matrix *C = ATA_alloc(A); + CSR_matrix *C = ATA_alloc(A); int expected_p[4] = {0, 2, 3, 5}; int expected_i[5] = {0, 2, 1, 0, 2}; @@ -36,8 +36,8 @@ const char *test_ATA_alloc_simple(void) mu_assert("i incorrect", cmp_int_array(C->i, expected_i, C->nnz)); mu_assert("nnz incorrect", C->nnz == 5); - free_csr_matrix(C); - free_csc_matrix(A); + free_CSR_matrix(C); + free_CSC_matrix(A); return 0; } @@ -57,13 +57,13 @@ const char *test_ATA_alloc_simple(void) */ const char *test_ATA_alloc_diagonal_like(void) { - /* Create A in CSC format (3 rows, 4 cols, 4 nonzeros) */ - CSC_Matrix *A = new_csc_matrix(3, 4, 4); + /* Create A in CSC_matrix format (3 rows, 4 cols, 4 nonzeros) */ + CSC_matrix *A = new_CSC_matrix(3, 4, 4); int Ap[5] = {0, 1, 2, 3, 4}; int Ai[4] = {0, 1, 2, 0}; memcpy(A->p, Ap, 5 * sizeof(int)); memcpy(A->i, Ai, 4 * sizeof(int)); - CSR_Matrix *C = ATA_alloc(A); + CSR_matrix *C = ATA_alloc(A); int expected_p[5] = {0, 2, 3, 4, 6}; int expected_i[6] = {0, 3, 1, 2, 0, 3}; @@ -72,23 +72,23 @@ const char *test_ATA_alloc_diagonal_like(void) mu_assert("i incorrect", cmp_int_array(C->i, expected_i, C->nnz)); mu_assert("nnz incorrect", C->nnz == 6); - free_csr_matrix(C); - free_csc_matrix(A); + free_CSR_matrix(C); + free_CSC_matrix(A); return 0; } const char *test_ATA_alloc_random(void) { - /* Create A in CSC format */ - CSC_Matrix *A = new_csc_matrix(10, 15, 15); + /* Create A in CSC_matrix format */ + CSC_matrix *A = new_CSC_matrix(10, 15, 15); int Ap[16] = {0, 1, 1, 1, 1, 4, 5, 6, 7, 8, 9, 11, 11, 11, 13, 15}; int Ai[15] = {5, 0, 6, 9, 0, 5, 1, 3, 6, 0, 6, 3, 6, 6, 8}; double Ax[15] = {7, 4, 8, 5, 7, 3, 7, 8, 5, 4, 8, 8, 3, 6, 5}; memcpy(A->p, Ap, 16 * sizeof(int)); memcpy(A->i, Ai, 15 * sizeof(int)); memcpy(A->x, Ax, 15 * sizeof(double)); - CSR_Matrix *C = ATA_alloc(A); + CSR_matrix *C = ATA_alloc(A); int expected_p[16] = {0, 2, 2, 2, 2, 8, 11, 13, 14, 16, 21, 27, 27, 27, 33, 38}; int expected_i[38] = {0, 6, 4, 5, 9, 10, 13, 14, 4, 5, 10, 0, 6, @@ -109,18 +109,18 @@ const char *test_ATA_alloc_random(void) 288., 144., 128., 90., 144., 182., 108., 288., 180., 288., 108., 241.}; mu_assert("x incorrect", cmp_double_array(C->x, Cx_correct, C->nnz)); - free_csr_matrix(C); - free_csc_matrix(A); + free_CSR_matrix(C); + free_CSC_matrix(A); return 0; } const char *test_ATA_alloc_random2(void) { - /* Create A in CSC format */ + /* Create A in CSC_matrix format */ int m = 15; int n = 10; - CSC_Matrix *A = new_csc_matrix(m, n, 15); + CSC_matrix *A = new_CSC_matrix(m, n, 15); int Ap[11] = {0, 2, 4, 6, 6, 9, 12, 12, 14, 14, 15}; int Ai[15] = {9, 12, 3, 4, 1, 6, 4, 8, 13, 1, 3, 7, 5, 13, 6}; double Ax[15] = {0.99, 0.9, 0.51, 0.64, 0.39, 0.29, 0.26, 0.91, @@ -128,7 +128,7 @@ const char *test_ATA_alloc_random2(void) memcpy(A->p, Ap, 11 * sizeof(int)); memcpy(A->i, Ai, 15 * sizeof(int)); memcpy(A->x, Ax, 15 * sizeof(double)); - CSR_Matrix *C = ATA_alloc(A); + CSR_matrix *C = ATA_alloc(A); int expected_p[11] = {0, 1, 4, 7, 7, 10, 13, 13, 15, 15, 17}; int expected_i[17] = {0, 1, 4, 5, 2, 5, 9, 1, 4, 7, 1, 2, 5, 4, 7, 2, 9}; @@ -147,14 +147,14 @@ const char *test_ATA_alloc_random2(void) 0.032857, 0.116699}; mu_assert("x incorrect", cmp_double_array(C->x, Cx_correct, C->nnz)); - free_csr_matrix(C); - free_csc_matrix(A); + free_CSR_matrix(C); + free_CSC_matrix(A); return 0; } const char *test_BTA_alloc_and_BTDA_fill(void) { - /* Create A: 4x3 CSC matrix + /* Create A: 4x3 CSC_matrix matrix * [1.0 0.0 2.0] * [0.0 3.0 0.0] * [4.0 0.0 5.0] @@ -162,7 +162,7 @@ const char *test_BTA_alloc_and_BTDA_fill(void) */ int m = 4; int n = 3; - CSC_Matrix *A = new_csc_matrix(m, n, 6); + CSC_matrix *A = new_CSC_matrix(m, n, 6); int Ap_A[4] = {0, 2, 4, 6}; int Ai_A[6] = {0, 2, 1, 3, 0, 2}; double Ax_A[6] = {1.0, 4.0, 3.0, 6.0, 2.0, 5.0}; @@ -170,14 +170,14 @@ const char *test_BTA_alloc_and_BTDA_fill(void) memcpy(A->i, Ai_A, 6 * sizeof(int)); memcpy(A->x, Ax_A, 6 * sizeof(double)); - /* Create B: 4x2 CSC matrix + /* Create B: 4x2 CSC_matrix matrix * [1.0 0.0] * [0.0 2.0] * [3.0 0.0] * [0.0 4.0] */ int p = 2; - CSC_Matrix *B = new_csc_matrix(m, p, 4); + CSC_matrix *B = new_CSC_matrix(m, p, 4); int Bp[3] = {0, 2, 4}; int Bi[4] = {0, 2, 1, 3}; double Bx[4] = {1.0, 3.0, 2.0, 4.0}; @@ -186,7 +186,7 @@ const char *test_BTA_alloc_and_BTDA_fill(void) memcpy(B->x, Bx, 4 * sizeof(double)); /* Allocate C = B^T A (should be 2x3) */ - CSR_Matrix *C = BTA_alloc(A, B); + CSR_matrix *C = BTA_alloc(A, B); /* Sparsity pattern check before filling values */ int expected_p[3] = {0, 2, 3}; @@ -203,9 +203,9 @@ const char *test_BTA_alloc_and_BTDA_fill(void) double expected_x[3] = {37.0, 47.0, 108.0}; mu_assert("C values incorrect", cmp_double_array(C->x, expected_x, 3)); - free_csr_matrix(C); - free_csc_matrix(A); - free_csc_matrix(B); + free_CSR_matrix(C); + free_CSC_matrix(A); + free_CSC_matrix(B); return 0; } diff --git a/tests/utils/test_csr_csc_conversion.h b/tests/utils/test_csr_csc_conversion.h index efbdc9e..bb8189c 100644 --- a/tests/utils/test_csr_csc_conversion.h +++ b/tests/utils/test_csr_csc_conversion.h @@ -5,19 +5,19 @@ #include "minunit.h" #include "test_helpers.h" -#include "utils/CSC_Matrix.h" -#include "utils/CSR_Matrix.h" +#include "utils/CSC_matrix.h" +#include "utils/CSR_matrix.h" -/* Test CSR to CSC conversion with fill_sparsity and fill_values */ +/* Test CSR_matrix to CSC_matrix conversion with fill_sparsity and fill_values */ const char *test_csr_to_csc_split(void) { - /* Create a 4x5 CSR matrix A: + /* Create a 4x5 CSR_matrix matrix A: * [1.0 0.0 0.0 0.0 1.0] * [0.0 0.0 3.0 0.0 0.0] * [0.0 2.0 0.0 0.0 0.0] * [0.0 0.0 0.0 4.0 0.0] */ - CSR_Matrix *A = new_csr_matrix(4, 5, 5); + CSR_matrix *A = new_CSR_matrix(4, 5, 5); double Ax[5] = {1.0, 1.0, 3.0, 2.0, 4.0}; int Ai[5] = {0, 4, 2, 1, 3}; int Ap[5] = {0, 2, 3, 4, 5}; @@ -29,7 +29,7 @@ const char *test_csr_to_csc_split(void) int *iwork = (int *) malloc(A->n * sizeof(int)); /* First, fill sparsity pattern */ - CSC_Matrix *C = csr_to_csc_alloc(A, iwork); + CSC_matrix *C = csr_to_csc_alloc(A, iwork); /* Check sparsity pattern */ int Cp_correct[6] = {0, 1, 2, 3, 4, 5}; @@ -47,22 +47,22 @@ const char *test_csr_to_csc_split(void) mu_assert("C vals incorrect", cmp_double_array(C->x, Cx_correct, 5)); free(iwork); - free_csr_matrix(A); - free_csc_matrix(C); + free_CSR_matrix(A); + free_CSC_matrix(C); return 0; } -/* Test CSC to CSR conversion with fill_sparsity */ +/* Test CSC_matrix to CSR_matrix conversion with fill_sparsity */ const char *test_csc_to_csr_sparsity(void) { - /* Create a 4x5 CSC matrix A: + /* Create a 4x5 CSC_matrix matrix A: * [1.0 0.0 0.0 0.0 2.0] * [0.0 0.0 3.0 0.0 0.0] * [0.0 4.0 0.0 0.0 0.0] * [0.0 0.0 0.0 5.0 0.0] */ - CSC_Matrix *A = new_csc_matrix(4, 5, 5); + CSC_matrix *A = new_CSC_matrix(4, 5, 5); double Ax[5] = {1.0, 4.0, 3.0, 5.0, 2.0}; int Ai[5] = {0, 2, 1, 3, 0}; int Ap[6] = {0, 1, 2, 3, 4, 5}; @@ -74,9 +74,9 @@ const char *test_csc_to_csr_sparsity(void) int *iwork = (int *) malloc(A->m * sizeof(int)); /* Fill sparsity pattern */ - CSR_Matrix *C = csc_to_csr_alloc(A, iwork); + CSR_matrix *C = csc_to_csr_alloc(A, iwork); - /* Expected CSR format: + /* Expected CSR_matrix format: * Row 0: [1.0 at col 0, 2.0 at col 4] * Row 1: [3.0 at col 2] * Row 2: [4.0 at col 1] @@ -91,17 +91,17 @@ const char *test_csc_to_csr_sparsity(void) mu_assert("C nnz incorrect", C->nnz == 5); free(iwork); - free_csc_matrix(A); - free_csr_matrix(C); + free_CSC_matrix(A); + free_CSR_matrix(C); return 0; } -/* Test CSC to CSR conversion with fill_values */ +/* Test CSC_matrix to CSR_matrix conversion with fill_values */ const char *test_csc_to_csr_values(void) { - /* Create a 4x5 CSC matrix A */ - CSC_Matrix *A = new_csc_matrix(4, 5, 5); + /* Create a 4x5 CSC_matrix matrix A */ + CSC_matrix *A = new_CSC_matrix(4, 5, 5); double Ax[5] = {1.0, 4.0, 3.0, 5.0, 2.0}; int Ai[5] = {0, 2, 1, 3, 0}; int Ap[6] = {0, 1, 2, 3, 4, 5}; @@ -113,7 +113,7 @@ const char *test_csc_to_csr_values(void) int *iwork = (int *) malloc(A->m * sizeof(int)); /* Fill sparsity pattern */ - CSR_Matrix *C = csc_to_csr_alloc(A, iwork); + CSR_matrix *C = csc_to_csr_alloc(A, iwork); /* Fill values */ csc_to_csr_fill_values(A, C, iwork); @@ -124,21 +124,21 @@ const char *test_csc_to_csr_values(void) mu_assert("C vals incorrect", cmp_double_array(C->x, Cx_correct, 5)); free(iwork); - free_csc_matrix(A); - free_csr_matrix(C); + free_CSC_matrix(A); + free_CSR_matrix(C); return 0; } -/* Test round-trip conversion: CSR -> CSC -> CSR */ +/* Test round-trip conversion: CSR_matrix -> CSC_matrix -> CSR_matrix */ const char *test_csr_csc_csr_roundtrip(void) { - /* Create a 3x4 CSR matrix A: + /* Create a 3x4 CSR_matrix matrix A: * [1.0 2.0 0.0 3.0] * [0.0 4.0 5.0 0.0] * [6.0 0.0 7.0 8.0] */ - CSR_Matrix *A = new_csr_matrix(3, 4, 8); + CSR_matrix *A = new_CSR_matrix(3, 4, 8); double Ax[8] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0}; int Ai[8] = {0, 1, 3, 1, 2, 0, 2, 3}; int Ap[4] = {0, 3, 5, 8}; @@ -146,14 +146,14 @@ const char *test_csr_csc_csr_roundtrip(void) memcpy(A->i, Ai, 8 * sizeof(int)); memcpy(A->p, Ap, 4 * sizeof(int)); - /* Convert CSR to CSC */ + /* Convert CSR_matrix to CSC_matrix */ int *iwork_csc = (int *) malloc(A->n * sizeof(int)); - CSC_Matrix *B = csr_to_csc_alloc(A, iwork_csc); + CSC_matrix *B = csr_to_csc_alloc(A, iwork_csc); csr_to_csc_fill_values(A, B, iwork_csc); - /* Convert CSC back to CSR */ + /* Convert CSC_matrix back to CSR_matrix */ int *iwork_csr = (int *) malloc(B->m * sizeof(int)); - CSR_Matrix *C = csc_to_csr_alloc(B, iwork_csr); + CSR_matrix *C = csc_to_csr_alloc(B, iwork_csr); csc_to_csr_fill_values(B, C, iwork_csr); /* C should match A */ @@ -163,9 +163,9 @@ const char *test_csr_csc_csr_roundtrip(void) free(iwork_csc); free(iwork_csr); - free_csr_matrix(A); - free_csc_matrix(B); - free_csr_matrix(C); + free_CSR_matrix(A); + free_CSC_matrix(B); + free_CSR_matrix(C); return 0; } diff --git a/tests/utils/test_csr_matrix.h b/tests/utils/test_csr_matrix.h index 68e0128..33c11a0 100644 --- a/tests/utils/test_csr_matrix.h +++ b/tests/utils/test_csr_matrix.h @@ -6,18 +6,18 @@ #include "old-code/old_CSR.h" #include "old-code/old_CSR_sum.h" #include "test_helpers.h" -#include "utils/CSR_Matrix.h" +#include "utils/CSR_matrix.h" #include "utils/CSR_sum.h" #include "utils/int_double_pair.h" const char *test_diag_csr_mult(void) { - /* Create a 3x3 CSR matrix A: + /* Create a 3x3 CSR_matrix matrix A: * [1.0 2.0 0.0] * [0.0 3.0 4.0] * [5.0 0.0 6.0] */ - CSR_Matrix *A = new_csr_matrix(3, 3, 6); + CSR_matrix *A = new_CSR_matrix(3, 3, 6); double Ax[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; int Ai[6] = {0, 1, 1, 2, 0, 2}; int Ap[4] = {0, 2, 4, 6}; @@ -31,7 +31,7 @@ const char *test_diag_csr_mult(void) * [0.0 9.0 12.0] * [2.5 0.0 3.0] */ - CSR_Matrix *C = new_csr_matrix(3, 3, 6); + CSR_matrix *C = new_CSR_matrix(3, 3, 6); diag_csr_mult(d, A, C); double Ax_correct[6] = {2.0, 4.0, 9.0, 12.0, 2.5, 3.0}; @@ -42,8 +42,8 @@ const char *test_diag_csr_mult(void) mu_assert("cols incorrect", cmp_int_array(C->i, Ai_correct, 6)); mu_assert("rows incorrect", cmp_int_array(C->p, Ap_correct, 4)); - free_csr_matrix(A); - free_csr_matrix(C); + free_CSR_matrix(A); + free_CSR_matrix(C); return 0; } @@ -55,7 +55,7 @@ const char *test_diag_csr_mult(void) */ const char *test_csr_sum(void) { - CSR_Matrix *A = new_csr_matrix(3, 3, 5); + CSR_matrix *A = new_CSR_matrix(3, 3, 5); double Ax[5] = {1.0, 2.0, 3.0, 4.0, 5.0}; int Ai[5] = {0, 2, 1, 0, 2}; int Ap[4] = {0, 2, 3, 5}; @@ -63,7 +63,7 @@ const char *test_csr_sum(void) memcpy(A->i, Ai, 5 * sizeof(int)); memcpy(A->p, Ap, 4 * sizeof(int)); - CSR_Matrix *B = new_csr_matrix(3, 3, 4); + CSR_matrix *B = new_CSR_matrix(3, 3, 4); double Bx[4] = {1.0, 2.0, 3.0, 6.0}; int Bi[4] = {1, 0, 2, 1}; int Bp[4] = {0, 1, 3, 4}; @@ -71,7 +71,7 @@ const char *test_csr_sum(void) memcpy(B->i, Bi, 4 * sizeof(int)); memcpy(B->p, Bp, 4 * sizeof(int)); - CSR_Matrix *C = new_csr_matrix(3, 3, 9); + CSR_matrix *C = new_CSR_matrix(3, 3, 9); sum_csr_matrices(A, B, C); double Cx_correct[9] = {1.0, 1.0, 2.0, 2.0, 3.0, 3.0, 4.0, 6.0, 5.0}; @@ -83,9 +83,9 @@ const char *test_csr_sum(void) mu_assert("C cols incorrect", cmp_int_array(C->i, Ci_correct, 9)); mu_assert("C rows incorrect", cmp_int_array(C->p, Cp_correct, 4)); - free_csr_matrix(A); - free_csr_matrix(B); - free_csr_matrix(C); + free_CSR_matrix(A); + free_CSR_matrix(B); + free_CSR_matrix(C); return 0; } @@ -97,7 +97,7 @@ const char *test_csr_sum(void) */ const char *test_csr_sum2(void) { - CSR_Matrix *A = new_csr_matrix(3, 3, 5); + CSR_matrix *A = new_CSR_matrix(3, 3, 5); double Ax[5] = {1.0, 2.0, 3.0, 4.0, 5.0}; int Ai[5] = {0, 2, 2, 0, 2}; int Ap[4] = {0, 2, 3, 5}; @@ -105,7 +105,7 @@ const char *test_csr_sum2(void) memcpy(A->i, Ai, 5 * sizeof(int)); memcpy(A->p, Ap, 4 * sizeof(int)); - CSR_Matrix *B = new_csr_matrix(3, 3, 4); + CSR_matrix *B = new_CSR_matrix(3, 3, 4); double Bx[4] = {1.0, 2.0, 3.0, 6.0}; int Bi[4] = {1, 0, 2, 1}; int Bp[4] = {0, 1, 3, 4}; @@ -113,7 +113,7 @@ const char *test_csr_sum2(void) memcpy(B->i, Bi, 4 * sizeof(int)); memcpy(B->p, Bp, 4 * sizeof(int)); - CSR_Matrix *C = new_csr_matrix(3, 3, 8); + CSR_matrix *C = new_CSR_matrix(3, 3, 8); sum_csr_matrices(A, B, C); double Cx_correct[8] = {1, 1, 2, 2, 6, 4, 6, 5}; @@ -125,16 +125,16 @@ const char *test_csr_sum2(void) mu_assert("C cols incorrect", cmp_int_array(C->i, Ci_correct, 8)); mu_assert("C rows incorrect", cmp_int_array(C->p, Cp_correct, 4)); - free_csr_matrix(A); - free_csr_matrix(B); - free_csr_matrix(C); + free_CSR_matrix(A); + free_CSR_matrix(B); + free_CSR_matrix(C); return 0; } const char *test_transpose(void) { - CSR_Matrix *A = new_csr_matrix(4, 5, 5); + CSR_matrix *A = new_CSR_matrix(4, 5, 5); double Ax[5] = {1.0, 1.0, 3.0, 2.0, 4.0}; int Ai[5] = {0, 4, 1, 0, 1}; int Ap[5] = {0, 2, 3, 4, 5}; @@ -143,7 +143,7 @@ const char *test_transpose(void) memcpy(A->p, Ap, 5 * sizeof(int)); int iwork[5]; - CSR_Matrix *AT = transpose(A, iwork); + CSR_matrix *AT = transpose(A, iwork); double ATx_correct[5] = {1.0, 2.0, 3.0, 4.0, 1.0}; int ATi_correct[5] = {0, 2, 1, 3, 0}; int ATp_correct[6] = {0, 2, 4, 4, 4, 5}; @@ -151,8 +151,8 @@ const char *test_transpose(void) mu_assert("AT cols incorrect", cmp_int_array(AT->i, ATi_correct, 5)); mu_assert("AT rows incorrect", cmp_int_array(AT->p, ATp_correct, 6)); - free_csr_matrix(A); - free_csr_matrix(AT); + free_CSR_matrix(A); + free_CSR_matrix(AT); return 0; } @@ -167,7 +167,7 @@ A = [1 0 0 0 1 */ const char *test_csr_vecmat_values_sparse(void) { - CSR_Matrix *A = new_csr_matrix(4, 5, 5); + CSR_matrix *A = new_CSR_matrix(4, 5, 5); double Ax[5] = {1.0, 1.0, 3.0, 2.0, 4.0}; int Ai[5] = {0, 4, 1, 0, 1}; int Ap[5] = {0, 2, 3, 4, 5}; @@ -177,7 +177,7 @@ const char *test_csr_vecmat_values_sparse(void) double z[4] = {1.0, 2.0, 3.0, 4.0}; - CSR_Matrix *C = new_csr_matrix(1, 3, 3); + CSR_matrix *C = new_CSR_matrix(1, 3, 3); double Cx[3] = {0.0, 0.0, 0.0}; int Ci[3] = {0, 1, 4}; int Cp[2] = {0, 3}; @@ -187,7 +187,7 @@ const char *test_csr_vecmat_values_sparse(void) int iwork[5]; - CSR_Matrix *AT = transpose(A, iwork); + CSR_matrix *AT = transpose(A, iwork); Ax_csr_fill_values(AT, z, C); @@ -196,15 +196,15 @@ const char *test_csr_vecmat_values_sparse(void) mu_assert("C nnz incorrect", C->nnz == 3); mu_assert("C vals incorrect", cmp_double_array(C->x, Cx_correct, 3)); - free_csr_matrix(A); - free_csr_matrix(AT); - free_csr_matrix(C); + free_CSR_matrix(A); + free_CSR_matrix(AT); + free_CSR_matrix(C); return 0; } const char *test_sum_all_rows_csr(void) { - /* Create a 3x4 CSR matrix A: + /* Create a 3x4 CSR_matrix matrix A: * [1.0 2.0 0.0 0.0] * [0.0 3.0 4.0 0.0] * [5.0 0.0 6.0 7.0] @@ -212,14 +212,14 @@ const char *test_sum_all_rows_csr(void) * Sum all rows should give: * [6.0 5.0 10.0 7.0] */ - CSR_Matrix *A = new_csr_matrix(3, 4, 7); + CSR_matrix *A = new_CSR_matrix(3, 4, 7); double Ax[7] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}; int Ai[7] = {0, 1, 1, 2, 0, 2, 3}; int Ap[4] = {0, 2, 4, 7}; memcpy(A->x, Ax, 7 * sizeof(double)); memcpy(A->i, Ai, 7 * sizeof(int)); memcpy(A->p, Ap, 4 * sizeof(int)); - CSR_Matrix *C = new_csr_matrix(1, 4, 4); + CSR_matrix *C = new_CSR_matrix(1, 4, 4); int_double_pair *pairs = new_int_double_pair_array(7); sum_all_rows_csr(A, C, pairs); double Cx_correct[4] = {6.0, 5.0, 10.0, 7.0}; @@ -231,15 +231,15 @@ const char *test_sum_all_rows_csr(void) mu_assert("C cols incorrect", cmp_int_array(C->i, Ci_correct, 4)); mu_assert("C rows incorrect", cmp_int_array(C->p, Cp_correct, 2)); - free_csr_matrix(A); - free_csr_matrix(C); + free_CSR_matrix(A); + free_CSR_matrix(C); free_int_double_pair_array(pairs); return 0; } const char *test_sum_block_of_rows_csr(void) { - /* Create a 9x4 CSR matrix A and sum blocks of size 3 + /* Create a 9x4 CSR_matrix matrix A and sum blocks of size 3 * Block 0 (rows 0-2): * [1.0 2.0 0.0 0.0] * [0.0 3.0 1.0 0.0] @@ -260,7 +260,7 @@ const char *test_sum_block_of_rows_csr(void) * * Result C should be 3x4 matrix with the sums above */ - CSR_Matrix *A = new_csr_matrix(9, 4, 18); + CSR_matrix *A = new_CSR_matrix(9, 4, 18); double Ax[18] = {1.0, 2.0, /* row 0 */ 3.0, 1.0, /* row 1 */ @@ -289,7 +289,7 @@ const char *test_sum_block_of_rows_csr(void) memcpy(A->p, Ap, 10 * sizeof(int)); /* Allocate C for 3 blocks and enough space for all nonzeros */ - CSR_Matrix *C = new_csr_matrix(3, 4, 12); + CSR_matrix *C = new_CSR_matrix(3, 4, 12); int_double_pair *pairs = new_int_double_pair_array(18); sum_block_of_rows_csr(A, C, pairs, 3); @@ -310,19 +310,20 @@ const char *test_sum_block_of_rows_csr(void) mu_assert("C cols incorrect", cmp_int_array(C->i, Ci_correct, 12)); mu_assert("C rows incorrect", cmp_int_array(C->p, Cp_correct, 4)); - free_csr_matrix(A); - free_csr_matrix(C); + free_CSR_matrix(A); + free_CSR_matrix(C); free_int_double_pair_array(pairs); return 0; } const char *test_sum_evenly_spaced_rows_csr(void) { - /* Create a 9x4 CSR matrix A (same as test_sum_block_of_rows_csr) and sum evenly + /* Create a 9x4 CSR_matrix matrix A (same as test_sum_block_of_rows_csr) and sum + evenly * spaced rows With row_spacing=3: - A = 9x4 CSR matrix: + A = 9x4 CSR_matrix matrix: 1 2 0 0 0 3 1 0 0 0 4 5 @@ -338,7 +339,7 @@ const char *test_sum_evenly_spaced_rows_csr(void) row 1: sum of rows 1, 4, 7 = [1 4 6 0] row 2: sum of rows 2, 5, 8 = [3 2 4 11] */ - CSR_Matrix *A = new_csr_matrix(9, 4, 18); + CSR_matrix *A = new_CSR_matrix(9, 4, 18); double Ax[18] = {1.0, 2.0, /* row 0 */ 3.0, 1.0, /* row 1 */ @@ -367,7 +368,7 @@ const char *test_sum_evenly_spaced_rows_csr(void) memcpy(A->p, Ap, 10 * sizeof(int)); /* Allocate C for 3 rows (row_spacing=3) and enough space for all nonzeros */ - CSR_Matrix *C = new_csr_matrix(3, 4, 10); + CSR_matrix *C = new_CSR_matrix(3, 4, 10); int_double_pair *pairs = new_int_double_pair_array(18); sum_evenly_spaced_rows_csr(A, C, pairs, 3); @@ -388,20 +389,20 @@ const char *test_sum_evenly_spaced_rows_csr(void) mu_assert("C cols incorrect", cmp_int_array(C->i, Ci_correct, 10)); mu_assert("C rows incorrect", cmp_int_array(C->p, Cp_correct, 4)); - free_csr_matrix(A); - free_csr_matrix(C); + free_CSR_matrix(A); + free_CSR_matrix(C); free_int_double_pair_array(pairs); return 0; } const char *test_AT_alloc_and_fill(void) { - /* Create a 3x4 CSR matrix A: + /* Create a 3x4 CSR_matrix matrix A: * [1.0 0.0 2.0 0.0] * [0.0 3.0 0.0 4.0] * [5.0 0.0 6.0 0.0] */ - CSR_Matrix *A = new_csr_matrix(3, 4, 6); + CSR_matrix *A = new_CSR_matrix(3, 4, 6); double Ax[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; int Ai[6] = {0, 2, 1, 3, 0, 2}; int Ap[4] = {0, 2, 4, 6}; @@ -411,7 +412,7 @@ const char *test_AT_alloc_and_fill(void) /* Allocate A^T (should be 4x3) */ int *iwork = (int *) malloc(A->n * sizeof(int)); - CSR_Matrix *AT = AT_alloc(A, iwork); + CSR_matrix *AT = AT_alloc(A, iwork); /* Fill values of A^T */ AT_fill_values(A, AT, iwork); @@ -432,8 +433,8 @@ const char *test_AT_alloc_and_fill(void) mu_assert("AT cols incorrect", cmp_int_array(AT->i, ATi_correct, 6)); mu_assert("AT rows incorrect", cmp_int_array(AT->p, ATp_correct, 5)); - free_csr_matrix(A); - free_csr_matrix(AT); + free_CSR_matrix(A); + free_CSR_matrix(AT); free(iwork); return 0; @@ -454,7 +455,7 @@ const char *test_kron_identity_csr(void) * [3 0 | 0 0 | 4 0] * [0 3 | 0 0 | 0 4] */ - CSR_Matrix *A = new_csr_matrix(2, 3, 4); + CSR_matrix *A = new_CSR_matrix(2, 3, 4); double Ax[4] = {1.0, 2.0, 3.0, 4.0}; int Ai[4] = {0, 2, 0, 2}; int Ap[3] = {0, 2, 4}; @@ -462,7 +463,7 @@ const char *test_kron_identity_csr(void) memcpy(A->i, Ai, 4 * sizeof(int)); memcpy(A->p, Ap, 3 * sizeof(int)); - CSR_Matrix *result = kron_identity_csr(A, 2); + CSR_matrix *result = kron_identity_csr(A, 2); /* Expected: 4x6 with 8 nonzeros * Row 0: [1, 0, 0, 0, 2, 0] -> cols {0, 4}, vals {1, 2} @@ -480,8 +481,8 @@ const char *test_kron_identity_csr(void) mu_assert("cols incorrect", cmp_int_array(result->i, expected_i, 8)); mu_assert("rows incorrect", cmp_int_array(result->p, expected_p, 5)); - free_csr_matrix(A); - free_csr_matrix(result); + free_CSR_matrix(A); + free_CSR_matrix(result); return 0; } diff --git a/tests/utils/test_linalg_sparse_matmuls.h b/tests/utils/test_linalg_sparse_matmuls.h index 120f99b..95c1c27 100644 --- a/tests/utils/test_linalg_sparse_matmuls.h +++ b/tests/utils/test_linalg_sparse_matmuls.h @@ -5,18 +5,18 @@ #include "minunit.h" #include "test_helpers.h" -#include "utils/CSC_Matrix.h" -#include "utils/CSR_Matrix.h" +#include "utils/CSC_matrix.h" +#include "utils/CSR_matrix.h" #include "utils/linalg_sparse_matmuls.h" /* Test block_left_multiply_fill_sparsity with simple case: single block */ const char *test_block_left_multiply_single_block(void) { - /* A is 2x3 CSR: + /* A is 2x3 CSR_matrix: * [1.0 0.0 0.0] * [0.0 1.0 1.0] */ - CSR_Matrix *A = new_csr_matrix(2, 3, 3); + CSR_matrix *A = new_CSR_matrix(2, 3, 3); double Ax[3] = {1.0, 1.0, 1.0}; int Ai[3] = {0, 1, 2}; int Ap[3] = {0, 1, 3}; @@ -24,12 +24,12 @@ const char *test_block_left_multiply_single_block(void) memcpy(A->i, Ai, 3 * sizeof(int)); memcpy(A->p, Ap, 3 * sizeof(int)); - /* J is 3x2 CSC (single block, so p=1): + /* J is 3x2 CSC_matrix (single block, so p=1): * [1.0 0.0] * [1.0 0.0] * [0.0 1.0] */ - CSC_Matrix *J = new_csc_matrix(3, 2, 3); + CSC_matrix *J = new_CSC_matrix(3, 2, 3); double Jx[3] = {1.0, 1.0, 1.0}; int Ji[3] = {0, 1, 2}; int Jp[3] = {0, 2, 3}; @@ -38,7 +38,7 @@ const char *test_block_left_multiply_single_block(void) memcpy(J->p, Jp, 3 * sizeof(int)); /* Compute C = A @ J1 (p=1 means just one block) */ - CSC_Matrix *C = block_left_multiply_fill_sparsity(A, J, 1); + CSC_matrix *C = block_left_multiply_fill_sparsity(A, J, 1); /* Expected C is 2x2: * C[0,0] = A[0,:] @ J[:,0] = 1.0 * 1.0 = 1.0 (row 0 has column 0, J col 0 has @@ -54,20 +54,20 @@ const char *test_block_left_multiply_single_block(void) mu_assert("C col pointers incorrect", cmp_int_array(C->p, expected_p, 3)); mu_assert("C row indices incorrect", cmp_int_array(C->i, expected_i, 3)); - free_csc_matrix(C); - free_csr_matrix(A); - free_csc_matrix(J); + free_CSC_matrix(C); + free_CSR_matrix(A); + free_CSC_matrix(J); return NULL; } /* Test block_left_multiply_fill_sparsity with two blocks */ const char *test_block_left_multiply_two_blocks(void) { - /* A is 2x2 CSR: + /* A is 2x2 CSR_matrix: * [1.0 0.0] * [0.0 1.0] */ - CSR_Matrix *A = new_csr_matrix(2, 2, 2); + CSR_matrix *A = new_CSR_matrix(2, 2, 2); double Ax[2] = {1.0, 1.0}; int Ai[2] = {0, 1}; int Ap[3] = {0, 1, 2}; @@ -75,7 +75,7 @@ const char *test_block_left_multiply_two_blocks(void) memcpy(A->i, Ai, 2 * sizeof(int)); memcpy(A->p, Ap, 3 * sizeof(int)); - /* J is 4x3 CSC (two blocks of 2 rows each): + /* J is 4x3 CSC_matrix (two blocks of 2 rows each): * Block 1 rows [0,1]: * [1.0 0.0 0.0] * [0.0 0.0 0.0] @@ -88,7 +88,7 @@ const char *test_block_left_multiply_two_blocks(void) * [0.0 1.0 0.0] * [0.0 0.0 1.0] */ - CSC_Matrix *J = new_csc_matrix(4, 3, 3); + CSC_matrix *J = new_CSC_matrix(4, 3, 3); double Jx[3] = {1.0, 1.0, 1.0}; int Ji[3] = {0, 2, 3}; int Jp[4] = {0, 1, 2, 3}; @@ -109,7 +109,7 @@ const char *test_block_left_multiply_two_blocks(void) * [0.0 0.0 0.0] * [0.0 1.0 1.0] */ - CSC_Matrix *C = block_left_multiply_fill_sparsity(A, J, 2); + CSC_matrix *C = block_left_multiply_fill_sparsity(A, J, 2); block_left_multiply_fill_values(A, J, C); int expected_p2[4] = {0, 1, 2, 3}; @@ -121,17 +121,17 @@ const char *test_block_left_multiply_two_blocks(void) mu_assert("C row indices incorrect", cmp_int_array(C->i, expected_i2, 3)); mu_assert("C values incorrect", cmp_double_array(C->x, expected_x2, 3)); - free_csc_matrix(C); - free_csr_matrix(A); - free_csc_matrix(J); + free_CSC_matrix(C); + free_CSR_matrix(A); + free_CSC_matrix(J); return NULL; } /* Test block_left_multiply_fill_sparsity with all zero column in J */ const char *test_block_left_multiply_zero_column(void) { - /* A is 2x2 CSR (identity) */ - CSR_Matrix *A = new_csr_matrix(2, 2, 2); + /* A is 2x2 CSR_matrix (identity) */ + CSR_matrix *A = new_CSR_matrix(2, 2, 2); double Ax[2] = {1.0, 1.0}; int Ai[2] = {0, 1}; int Ap[3] = {0, 1, 2}; @@ -143,7 +143,7 @@ const char *test_block_left_multiply_zero_column(void) * [1.0 0.0] * [0.0 0.0] */ - CSC_Matrix *J = new_csc_matrix(2, 2, 1); + CSC_matrix *J = new_CSC_matrix(2, 2, 1); double Jx[1] = {1.0}; int Ji[1] = {0}; int Jp[3] = {0, 1, 1}; /* Column 0 has one nonzero, column 1 is empty */ @@ -151,7 +151,7 @@ const char *test_block_left_multiply_zero_column(void) memcpy(J->i, Ji, 1 * sizeof(int)); memcpy(J->p, Jp, 3 * sizeof(int)); - CSC_Matrix *C = block_left_multiply_fill_sparsity(A, J, 1); + CSC_matrix *C = block_left_multiply_fill_sparsity(A, J, 1); int expected_p3[3] = {0, 1, 1}; int expected_i3[1] = {0}; @@ -160,21 +160,21 @@ const char *test_block_left_multiply_zero_column(void) mu_assert("C col pointers incorrect", cmp_int_array(C->p, expected_p3, 3)); mu_assert("C row indices incorrect", cmp_int_array(C->i, expected_i3, 1)); - free_csc_matrix(C); - free_csr_matrix(A); - free_csc_matrix(J); + free_CSC_matrix(C); + free_CSR_matrix(A); + free_CSC_matrix(J); return NULL; } -/* Test csr_csc_matmul_alloc: C = A @ B where A is CSR and B is CSC */ +/* Test csr_csc_matmul_alloc: C = A @ B where A is CSR_matrix and B is CSC_matrix */ const char *test_csr_csc_matmul_alloc_basic(void) { - /* A is 3x2 CSR: + /* A is 3x2 CSR_matrix: * [1.0 0.0] * [0.0 1.0] * [1.0 1.0] */ - CSR_Matrix *A = new_csr_matrix(3, 2, 4); + CSR_matrix *A = new_CSR_matrix(3, 2, 4); double Ax[4] = {1.0, 1.0, 1.0, 1.0}; int Ai[4] = {0, 1, 0, 1}; int Ap[4] = {0, 1, 2, 4}; @@ -182,11 +182,11 @@ const char *test_csr_csc_matmul_alloc_basic(void) memcpy(A->i, Ai, 4 * sizeof(int)); memcpy(A->p, Ap, 4 * sizeof(int)); - /* B is 2x3 CSC: + /* B is 2x3 CSC_matrix: * [1.0 0.0 1.0] * [0.0 1.0 1.0] */ - CSC_Matrix *B = new_csc_matrix(2, 3, 4); + CSC_matrix *B = new_CSC_matrix(2, 3, 4); double Bx[4] = {1.0, 1.0, 1.0, 1.0}; int Bi[4] = {0, 1, 0, 1}; int Bp[4] = {0, 1, 2, 4}; @@ -199,7 +199,7 @@ const char *test_csr_csc_matmul_alloc_basic(void) * [0, 1, 1], * [1, 1, 2]] */ - CSR_Matrix *C = csr_csc_matmul_alloc(A, B); + CSR_matrix *C = csr_csc_matmul_alloc(A, B); int expected_p4[4] = {0, 2, 4, 7}; int expected_i4[7] = {0, 2, 1, 2, 0, 1, 2}; @@ -208,20 +208,20 @@ const char *test_csr_csc_matmul_alloc_basic(void) mu_assert("C row pointers incorrect", cmp_int_array(C->p, expected_p4, 4)); mu_assert("C col indices incorrect", cmp_int_array(C->i, expected_i4, 7)); - free_csr_matrix(C); - free_csr_matrix(A); - free_csc_matrix(B); + free_CSR_matrix(C); + free_CSR_matrix(A); + free_CSC_matrix(B); return NULL; } /* Test csr_csc_matmul_alloc with sparse result */ const char *test_csr_csc_matmul_alloc_sparse(void) { - /* A is 2x3 CSR: + /* A is 2x3 CSR_matrix: * [1.0 0.0 0.0] * [0.0 0.0 1.0] */ - CSR_Matrix *A = new_csr_matrix(2, 3, 2); + CSR_matrix *A = new_CSR_matrix(2, 3, 2); double Ax[2] = {1.0, 1.0}; int Ai[2] = {0, 2}; int Ap[3] = {0, 1, 2}; @@ -229,12 +229,12 @@ const char *test_csr_csc_matmul_alloc_sparse(void) memcpy(A->i, Ai, 2 * sizeof(int)); memcpy(A->p, Ap, 3 * sizeof(int)); - /* B is 3x2 CSC: + /* B is 3x2 CSC_matrix: * [1.0 0.0] * [0.0 0.0] * [0.0 1.0] */ - CSC_Matrix *B = new_csc_matrix(3, 2, 2); + CSC_matrix *B = new_CSC_matrix(3, 2, 2); double Bx[2] = {1.0, 1.0}; int Bi[2] = {0, 2}; int Bp[3] = {0, 1, 2}; @@ -246,7 +246,7 @@ const char *test_csr_csc_matmul_alloc_sparse(void) * C = [[1, 0], * [0, 1]] */ - CSR_Matrix *C = csr_csc_matmul_alloc(A, B); + CSR_matrix *C = csr_csc_matmul_alloc(A, B); int expected_p5[3] = {0, 1, 2}; int expected_i5[2] = {0, 1}; @@ -255,20 +255,20 @@ const char *test_csr_csc_matmul_alloc_sparse(void) mu_assert("C row pointers incorrect", cmp_int_array(C->p, expected_p5, 3)); mu_assert("C col indices incorrect", cmp_int_array(C->i, expected_i5, 2)); - free_csr_matrix(C); - free_csr_matrix(A); - free_csc_matrix(B); + free_CSR_matrix(C); + free_CSR_matrix(A); + free_CSC_matrix(B); return NULL; } /* Test block_left_multiply_vec with single block: y = A @ x */ const char *test_block_left_multiply_vec_single_block(void) { - /* A is 2x3 CSR: + /* A is 2x3 CSR_matrix: * [1.0 0.0 2.0] * [0.0 3.0 0.0] */ - CSR_Matrix *A = new_csr_matrix(2, 3, 3); + CSR_matrix *A = new_CSR_matrix(2, 3, 3); double Ax[3] = {1.0, 3.0, 2.0}; int Ai[3] = {0, 1, 2}; int Ap[3] = {0, 2, 3}; @@ -289,18 +289,18 @@ const char *test_block_left_multiply_vec_single_block(void) double expected_y[2] = {7.0, 6.0}; mu_assert("y values incorrect", cmp_double_array(y, expected_y, 2)); - free_csr_matrix(A); + free_CSR_matrix(A); return NULL; } /* Test block_left_multiply_vec with two blocks: y = [A @ x1; A @ x2] */ const char *test_block_left_multiply_vec_two_blocks(void) { - /* A is 2x3 CSR: + /* A is 2x3 CSR_matrix: * [1.0 2.0 0.0] * [0.0 3.0 4.0] */ - CSR_Matrix *A = new_csr_matrix(2, 3, 4); + CSR_matrix *A = new_CSR_matrix(2, 3, 4); double Ax[4] = {1.0, 2.0, 3.0, 4.0}; int Ai[4] = {0, 1, 1, 2}; int Ap[3] = {0, 2, 4}; @@ -323,19 +323,19 @@ const char *test_block_left_multiply_vec_two_blocks(void) double expected_y[4] = {5.0, 18.0, 14.0, 39.0}; mu_assert("y values incorrect", cmp_double_array(y, expected_y, 4)); - free_csr_matrix(A); + free_CSR_matrix(A); return NULL; } /* Test block_left_multiply_vec with sparse matrix and multiple blocks */ const char *test_block_left_multiply_vec_sparse(void) { - /* A is 3x4 CSR (very sparse): + /* A is 3x4 CSR_matrix (very sparse): * [2.0 0.0 0.0 0.0] * [0.0 0.0 3.0 0.0] * [0.0 0.0 0.0 4.0] */ - CSR_Matrix *A = new_csr_matrix(3, 4, 3); + CSR_matrix *A = new_CSR_matrix(3, 4, 3); double Ax[3] = {2.0, 3.0, 4.0}; int Ai[3] = {0, 2, 3}; int Ap[4] = {0, 1, 2, 3}; @@ -358,18 +358,18 @@ const char *test_block_left_multiply_vec_sparse(void) double expected_y[6] = {2.0, 9.0, 16.0, 10.0, 21.0, 32.0}; mu_assert("y values incorrect", cmp_double_array(y, expected_y, 6)); - free_csr_matrix(A); + free_CSR_matrix(A); return NULL; } /* Test block_left_multiply_vec with three blocks */ const char *test_block_left_multiply_vec_three_blocks(void) { - /* A is 2x2 CSR: + /* A is 2x2 CSR_matrix: * [1.0 2.0] * [3.0 4.0] */ - CSR_Matrix *A = new_csr_matrix(2, 2, 4); + CSR_matrix *A = new_CSR_matrix(2, 2, 4); double Ax[4] = {1.0, 2.0, 3.0, 4.0}; int Ai[4] = {0, 1, 0, 1}; int Ap[3] = {0, 2, 4}; @@ -393,6 +393,6 @@ const char *test_block_left_multiply_vec_three_blocks(void) double expected_y[6] = {5.0, 11.0, 11.0, 25.0, 17.0, 39.0}; mu_assert("y values incorrect", cmp_double_array(y, expected_y, 6)); - free_csr_matrix(A); + free_CSR_matrix(A); return NULL; } diff --git a/tests/utils/test_linalg_utils_matmul_chain_rule.h b/tests/utils/test_linalg_utils_matmul_chain_rule.h index 0c66589..86d7e3f 100644 --- a/tests/utils/test_linalg_utils_matmul_chain_rule.h +++ b/tests/utils/test_linalg_utils_matmul_chain_rule.h @@ -4,8 +4,8 @@ #include "minunit.h" #include "test_helpers.h" -#include "utils/CSC_Matrix.h" -#include "utils/CSR_Matrix.h" +#include "utils/CSC_matrix.h" +#include "utils/CSR_matrix.h" #include "utils/linalg_dense_sparse_matmuls.h" /* Test YT_kron_I_alloc and YT_kron_I_fill_values @@ -17,7 +17,7 @@ * [1 3] * [2 4] * - * J (mk=4 x p=3, CSC): + * J (mk=4 x p=3, CSC_matrix): * [1 0 2] * [0 1 0] * [3 0 0] @@ -33,8 +33,8 @@ const char *test_YT_kron_I(void) { int m = 2, k = 2, n = 2; - /* J is 4x3 CSC */ - CSC_Matrix *J = new_csc_matrix(4, 3, 5); + /* J is 4x3 CSC_matrix */ + CSC_matrix *J = new_CSC_matrix(4, 3, 5); int Jp[4] = {0, 2, 3, 5}; int Ji[5] = {0, 2, 1, 0, 3}; double Jx[5] = {1.0, 3.0, 1.0, 2.0, 1.0}; @@ -45,9 +45,9 @@ const char *test_YT_kron_I(void) /* Y col-major: Y[0,0]=1, Y[1,0]=2, Y[0,1]=3, Y[1,1]=4 */ double Y[4] = {1.0, 2.0, 3.0, 4.0}; - CSR_Matrix *C = YT_kron_I_alloc(m, k, n, J); + CSR_matrix *C = YT_kron_I_alloc(m, k, n, J); - /* Expected CSR (from scipy) */ + /* Expected CSR_matrix (from scipy) */ int exp_p[5] = {0, 2, 4, 6, 8}; int exp_i[8] = {0, 2, 1, 2, 0, 2, 1, 2}; double exp_x[8] = {7.0, 2.0, 1.0, 2.0, 15.0, 6.0, 3.0, 4.0}; @@ -60,8 +60,8 @@ const char *test_YT_kron_I(void) YT_kron_I_fill_values(m, k, n, Y, J, C); mu_assert("C values", cmp_double_array(C->x, exp_x, 8)); - free_csr_matrix(C); - free_csc_matrix(J); + free_CSR_matrix(C); + free_CSC_matrix(J); return NULL; } @@ -71,7 +71,7 @@ const char *test_YT_kron_I(void) * [1.0 0.5 2.0] * [3.0 1.0 0.5] * - * J (mk=6 x p=4, CSC): + * J (mk=6 x p=4, CSC_matrix): * [1 0 0 2] * [0 0 1 0] * [0 3 0 0] @@ -85,8 +85,8 @@ const char *test_YT_kron_I_larger(void) { int m = 3, k = 2, n = 3; - /* J is 6x4 CSC */ - CSC_Matrix *J = new_csc_matrix(6, 4, 8); + /* J is 6x4 CSC_matrix */ + CSC_matrix *J = new_CSC_matrix(6, 4, 8); int Jp[5] = {0, 2, 4, 6, 8}; int Ji[8] = {0, 3, 2, 4, 1, 5, 0, 3}; double Jx[8] = {1.0, 2.0, 3.0, 1.0, 1.0, 4.0, 2.0, 1.0}; @@ -97,9 +97,9 @@ const char *test_YT_kron_I_larger(void) /* Y col-major */ double Y[6] = {1.0, 3.0, 0.5, 1.0, 2.0, 0.5}; - CSR_Matrix *C = YT_kron_I_alloc(m, k, n, J); + CSR_matrix *C = YT_kron_I_alloc(m, k, n, J); - /* Expected CSR (from scipy) */ + /* Expected CSR_matrix (from scipy) */ int exp_p[10] = {0, 2, 4, 6, 8, 10, 12, 14, 16, 18}; int exp_i[18] = {0, 3, 1, 2, 1, 2, 0, 3, 1, 2, 1, 2, 0, 3, 1, 2, 1, 2}; double exp_x[18] = {7.0, 5.0, 3.0, 1.0, 3.0, 12.0, 2.5, 2.0, 1.0, @@ -113,8 +113,8 @@ const char *test_YT_kron_I_larger(void) YT_kron_I_fill_values(m, k, n, Y, J, C); mu_assert("C2 values", cmp_double_array(C->x, exp_x, 18)); - free_csr_matrix(C); - free_csc_matrix(J); + free_CSR_matrix(C); + free_CSC_matrix(J); return NULL; } @@ -127,7 +127,7 @@ const char *test_YT_kron_I_larger(void) * [1 3] * [2 4] * - * J (kn=4 x p=3, CSC): + * J (kn=4 x p=3, CSC_matrix): * [1 0 2] * [0 1 0] * [3 0 0] @@ -143,8 +143,8 @@ const char *test_I_kron_X(void) { int m = 2, k = 2, n = 2; - /* J is 4x3 CSC */ - CSC_Matrix *J = new_csc_matrix(4, 3, 5); + /* J is 4x3 CSC_matrix */ + CSC_matrix *J = new_CSC_matrix(4, 3, 5); int Jp[4] = {0, 2, 3, 5}; int Ji[5] = {0, 2, 1, 0, 3}; double Jx[5] = {1.0, 3.0, 1.0, 2.0, 1.0}; @@ -155,9 +155,9 @@ const char *test_I_kron_X(void) /* X col-major */ double X[4] = {1.0, 2.0, 3.0, 4.0}; - CSR_Matrix *C = I_kron_X_alloc(m, k, n, J); + CSR_matrix *C = I_kron_X_alloc(m, k, n, J); - /* Expected CSR */ + /* Expected CSR_matrix */ int exp_p[5] = {0, 3, 6, 8, 10}; int exp_i[10] = {0, 1, 2, 0, 1, 2, 0, 2, 0, 2}; double exp_x[10] = {1.0, 3.0, 2.0, 2.0, 4.0, 4.0, 3.0, 3.0, 6.0, 4.0}; @@ -170,8 +170,8 @@ const char *test_I_kron_X(void) I_kron_X_fill_values(m, k, n, X, J, C); mu_assert("C values", cmp_double_array(C->x, exp_x, 10)); - free_csr_matrix(C); - free_csc_matrix(J); + free_CSR_matrix(C); + free_CSC_matrix(J); return NULL; } @@ -182,7 +182,7 @@ const char *test_I_kron_X(void) * [2.0 1.0] * [3.0 0.5] * - * J (kn=4 x p=4, CSC): + * J (kn=4 x p=4, CSC_matrix): * [1 0 0 2] * [0 3 1 0] * [0 0 4 0] @@ -194,8 +194,8 @@ const char *test_I_kron_X_larger(void) { int m = 3, k = 2, n = 2; - /* J is 4x4 CSC */ - CSC_Matrix *J = new_csc_matrix(4, 4, 7); + /* J is 4x4 CSC_matrix */ + CSC_matrix *J = new_CSC_matrix(4, 4, 7); int Jp[5] = {0, 2, 3, 5, 7}; int Ji[7] = {0, 3, 1, 1, 2, 0, 3}; double Jx[7] = {1.0, 2.0, 3.0, 1.0, 4.0, 2.0, 1.0}; @@ -206,9 +206,9 @@ const char *test_I_kron_X_larger(void) /* X col-major */ double X[6] = {1.0, 2.0, 3.0, 0.5, 1.0, 0.5}; - CSR_Matrix *C = I_kron_X_alloc(m, k, n, J); + CSR_matrix *C = I_kron_X_alloc(m, k, n, J); - /* Expected CSR */ + /* Expected CSR_matrix */ int exp_p[7] = {0, 4, 8, 12, 15, 18, 21}; int exp_i[21] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 2, 3, 0, 2, 3, 0, 2, 3}; double exp_x[21] = {1.0, 1.5, 0.5, 2.0, 2.0, 3.0, 1.0, 4.0, 3.0, 1.5, 0.5, @@ -222,7 +222,7 @@ const char *test_I_kron_X_larger(void) I_kron_X_fill_values(m, k, n, X, J, C); mu_assert("C2 values", cmp_double_array(C->x, exp_x, 21)); - free_csr_matrix(C); - free_csc_matrix(J); + free_CSR_matrix(C); + free_CSC_matrix(J); return NULL; } diff --git a/tests/utils/test_matrix.h b/tests/utils/test_matrix.h index c329a16..d14dfbc 100644 --- a/tests/utils/test_matrix.h +++ b/tests/utils/test_matrix.h @@ -3,17 +3,18 @@ #include "minunit.h" #include "test_helpers.h" -#include "utils/dense_matrix.h" +#include "utils/permuted_dense.h" +#include "utils/sparse_matrix.h" #include #include -/* Test dense block_left_mult_vec against known result. +/* Test full-block PD block_left_mult_vec against known result. A = [1 2; 3 4] (2x2), x = [1; 2], p = 1 y = A * x = [1*1+2*2; 3*1+4*2] = [5; 11] */ -const char *test_dense_matrix_mult_vec(void) +const char *test_pd_mult_vec_basic(void) { double data[] = {1.0, 2.0, 3.0, 4.0}; - Matrix *A = new_dense_matrix(2, 2, data); + matrix *A = new_permuted_dense_full(2, 2, data); double x[] = {1.0, 2.0}; double y[2] = {0.0, 0.0}; @@ -27,13 +28,13 @@ const char *test_dense_matrix_mult_vec(void) return 0; } -/* Test dense block_left_mult_vec with multiple blocks. +/* Test full-block PD block_left_mult_vec with multiple blocks. A = [1 2; 3 4] (2x2), x = [1; 2; 3; 4], p = 2 y = [A*[1;2]; A*[3;4]] = [5; 11; 11; 25] */ -const char *test_dense_matrix_mult_vec_blocks(void) +const char *test_pd_mult_vec_blocks(void) { double data[] = {1.0, 2.0, 3.0, 4.0}; - Matrix *A = new_dense_matrix(2, 2, data); + matrix *A = new_permuted_dense_full(2, 2, data); double x[] = {1.0, 2.0, 3.0, 4.0}; double y[4] = {0}; @@ -47,12 +48,12 @@ const char *test_dense_matrix_mult_vec_blocks(void) return 0; } -/* Compare sparse vs dense block_left_mult_vec for a non-square matrix. +/* Compare sparse vs PD block_left_mult_vec for a non-square matrix. A = [1 2 3; 4 5 6] (2x3), x = [1; 2; 3], p = 1 */ -const char *test_sparse_vs_dense_mult_vec(void) +const char *test_sparse_vs_pd_mult_vec(void) { - /* Build CSR for A = [1 2 3; 4 5 6] */ - CSR_Matrix *csr = new_csr_matrix(2, 3, 6); + /* Build CSR_matrix for A = [1 2 3; 4 5 6] */ + CSR_matrix *csr = new_CSR_matrix(2, 3, 6); int Ap[3] = {0, 3, 6}; int Ai[6] = {0, 1, 2, 0, 1, 2}; double Ax[6] = {1, 2, 3, 4, 5, 6}; @@ -60,51 +61,50 @@ const char *test_sparse_vs_dense_mult_vec(void) memcpy(csr->i, Ai, 6 * sizeof(int)); memcpy(csr->x, Ax, 6 * sizeof(double)); - double dense_data[] = {1, 2, 3, 4, 5, 6}; + double pd_data[] = {1, 2, 3, 4, 5, 6}; - Matrix *sparse = new_sparse_matrix(csr); - Matrix *dense = new_dense_matrix(2, 3, dense_data); + matrix *sparse = new_sparse_matrix(csr); + matrix *pd = new_permuted_dense_full(2, 3, pd_data); double x[] = {1.0, 2.0, 3.0}; double y_sparse[2] = {0}; - double y_dense[2] = {0}; + double y_pd[2] = {0}; sparse->block_left_mult_vec(sparse, x, y_sparse, 1); - dense->block_left_mult_vec(dense, x, y_dense, 1); + pd->block_left_mult_vec(pd, x, y_pd, 1); - mu_assert("sparse vs dense mismatch", cmp_double_array(y_sparse, y_dense, 2)); + mu_assert("sparse vs pd mismatch", cmp_double_array(y_sparse, y_pd, 2)); free_matrix(sparse); - free_matrix(dense); - free_csr_matrix(csr); + free_matrix(pd); return 0; } -/* Test dense transpose */ -const char *test_dense_matrix_trans(void) +/* Test full-block PD transpose via vtable. */ +const char *test_pd_trans_full_block(void) { double data[] = {1, 2, 3, 4, 5, 6}; /* 2x3 */ - Matrix *A = new_dense_matrix(2, 3, data); - Matrix *AT = dense_matrix_trans((const Dense_Matrix *) A); + matrix *A = new_permuted_dense_full(2, 3, data); + matrix *AT = A->transpose_alloc(A); + A->transpose_fill_values(A, AT); mu_assert("transpose m", AT->m == 3); mu_assert("transpose n", AT->n == 2); /* AT should be [1 4; 2 5; 3 6] stored row-major */ - Dense_Matrix *dm = (Dense_Matrix *) AT; double AT_expected[6] = {1.0, 4.0, 2.0, 5.0, 3.0, 6.0}; - mu_assert("AT vals incorrect", cmp_double_array(dm->x, AT_expected, 6)); + mu_assert("AT vals incorrect", cmp_double_array(AT->x, AT_expected, 6)); free_matrix(A); free_matrix(AT); return 0; } -/* Compare sparse vs dense block_left_mult_vec with p=2 blocks. +/* Compare sparse vs PD block_left_mult_vec with p=2 blocks. A = [1 2; 3 4], x = [1; 2; 3; 4], p = 2 */ -const char *test_sparse_vs_dense_mult_vec_blocks(void) +const char *test_sparse_vs_pd_mult_vec_blocks(void) { - CSR_Matrix *csr = new_csr_matrix(2, 2, 4); + CSR_matrix *csr = new_CSR_matrix(2, 2, 4); int Ap[3] = {0, 2, 4}; int Ai[4] = {0, 1, 0, 1}; double Ax[4] = {1, 2, 3, 4}; @@ -112,24 +112,43 @@ const char *test_sparse_vs_dense_mult_vec_blocks(void) memcpy(csr->i, Ai, 4 * sizeof(int)); memcpy(csr->x, Ax, 4 * sizeof(double)); - double dense_data[] = {1, 2, 3, 4}; + double pd_data[] = {1, 2, 3, 4}; - Matrix *sparse = new_sparse_matrix(csr); - Matrix *dense = new_dense_matrix(2, 2, dense_data); + matrix *sparse = new_sparse_matrix(csr); + matrix *pd = new_permuted_dense_full(2, 2, pd_data); double x[] = {1.0, 2.0, 3.0, 4.0}; double y_sparse[4] = {0}; - double y_dense[4] = {0}; + double y_pd[4] = {0}; sparse->block_left_mult_vec(sparse, x, y_sparse, 2); - dense->block_left_mult_vec(dense, x, y_dense, 2); + pd->block_left_mult_vec(pd, x, y_pd, 2); - mu_assert("sparse vs dense blocks mismatch", - cmp_double_array(y_sparse, y_dense, 4)); + mu_assert("sparse vs pd blocks mismatch", cmp_double_array(y_sparse, y_pd, 4)); free_matrix(sparse); - free_matrix(dense); - free_csr_matrix(csr); + free_matrix(pd); + return 0; +} + +/* Full-block permuted_dense acting as operator: smoke test mirroring + test_pd_mult_vec_basic with an explicit row_perm/col_perm. */ +const char *test_pd_operator_block_left_mult_vec(void) +{ + double data[] = {1.0, 2.0, 3.0, 4.0}; + int row_perm[2] = {0, 1}; + int col_perm[2] = {0, 1}; + matrix *A = new_permuted_dense(2, 2, 2, 2, row_perm, col_perm, data); + + double x[] = {1.0, 2.0}; + double y[2] = {0.0, 0.0}; + + A->block_left_mult_vec(A, x, y, 1); + + double y_expected[2] = {5.0, 11.0}; + mu_assert("y incorrect", cmp_double_array(y, y_expected, 2)); + + free_matrix(A); return 0; } diff --git a/tests/utils/test_matrix_BTA.h b/tests/utils/test_matrix_BTA.h new file mode 100644 index 0000000..feb5fe9 --- /dev/null +++ b/tests/utils/test_matrix_BTA.h @@ -0,0 +1,173 @@ +#ifndef TEST_MATRIX_BTA_H +#define TEST_MATRIX_BTA_H + +#include "minunit.h" +#include "old-code/old_permuted_dense.h" +#include "test_helpers.h" +#include "utils/CSC_matrix.h" +#include "utils/CSR_matrix.h" +#include "utils/matrix_BTA.h" +#include "utils/permuted_dense.h" +#include "utils/sparse_matrix.h" +#include "utils/utils.h" +#include +#include + +/* Wrapper dispatch sanity: (PD, PD). Compare against direct + BTDA_pd_pd_fill_values. */ +const char *test_BTDA_matrices_pd_pd(void) +{ + int row_perm[2] = {0, 1}; + int col_perm_A[2] = {0, 2}; + int col_perm_B[2] = {1, 3}; + double XA[4] = {1.0, 2.0, 3.0, 4.0}; + double XB[4] = {5.0, 6.0, 7.0, 8.0}; + double d[2] = {2.0, -1.5}; + + matrix *A_m = new_permuted_dense(2, 4, 2, 2, row_perm, col_perm_A, XA); + matrix *B_m = new_permuted_dense(2, 4, 2, 2, row_perm, col_perm_B, XB); + + /* Wrapper path. */ + matrix *C_m = BTA_matrices_alloc(A_m, B_m); + BTDA_matrices_fill_values(A_m, d, B_m, C_m); + + /* Direct primitive path on independent operands. */ + matrix *A2 = new_permuted_dense(2, 4, 2, 2, row_perm, col_perm_A, XA); + matrix *B2 = new_permuted_dense(2, 4, 2, 2, row_perm, col_perm_B, XB); + matrix *C2 = BTA_pd_pd_alloc((permuted_dense *) B2, (permuted_dense *) A2); + BTDA_pd_pd_fill_values((permuted_dense *) B2, d, (permuted_dense *) A2, + (permuted_dense *) C2); + + mu_assert("values", cmp_double_array(C_m->x, C2->x, C_m->nnz)); + + free_matrix(C_m); + free_matrix(B_m); + free_matrix(A_m); + free_matrix(C2); + free_matrix(B2); + free_matrix(A2); + return 0; +} + +/* Wrapper dispatch sanity: (CSR_matrix, PD). Compare against direct + BTDA_pd_csr_fill_values. */ +const char *test_BTDA_matrices_csr_pd(void) +{ + /* A: 4x5 CSR_matrix */ + CSR_matrix *A = new_CSR_matrix(4, 5, 5); + A->p[0] = 0; + A->p[1] = 2; + A->p[2] = 3; + A->p[3] = 4; + A->p[4] = 5; + int Ai[5] = {0, 3, 2, 1, 4}; + double Ax[5] = {1.0, 2.0, 3.0, 4.0, 5.0}; + memcpy(A->i, Ai, sizeof Ai); + memcpy(A->x, Ax, sizeof Ax); + matrix *A_m = new_sparse_matrix(A); + + /* B: 4x4 PD, row_perm = [1, 3], col_perm = [0, 2]. */ + int row_perm_B[2] = {1, 3}; + int col_perm_B[2] = {0, 2}; + double XB[4] = {10.0, 20.0, 30.0, 40.0}; + matrix *B_m = new_permuted_dense(4, 4, 2, 2, row_perm_B, col_perm_B, XB); + + double d[4] = {1.0, -2.0, 0.5, 3.0}; + + /* Wrapper path. Dispatchers don't touch sparse_matrix internals — caller + owns csc_cache structure and values. */ + sparse_matrix_ensure_csc_cache((sparse_matrix *) A_m); + matrix *C_m = BTA_matrices_alloc(A_m, B_m); + A_m->refresh_csc_values(A_m); + BTDA_matrices_fill_values(A_m, d, B_m, C_m); + + /* Direct primitive path. */ + CSR_matrix *A2 = new_CSR_matrix(4, 5, 5); + A2->p[0] = 0; + A2->p[1] = 2; + A2->p[2] = 3; + A2->p[3] = 4; + A2->p[4] = 5; + memcpy(A2->i, Ai, sizeof Ai); + memcpy(A2->x, Ax, sizeof Ax); + matrix *B2_m = new_permuted_dense(4, 4, 2, 2, row_perm_B, col_perm_B, XB); + permuted_dense *B2 = (permuted_dense *) B2_m; + matrix *C2 = BTA_pd_csr_alloc(B2, A2); + BTDA_pd_csr_fill_values(B2, d, A2, (permuted_dense *) C2); + + mu_assert("values", cmp_double_array(C_m->x, C2->x, C_m->nnz)); + + free_matrix(C_m); + free_matrix(B_m); + free_matrix(A_m); + free_matrix(C2); + free_matrix(B2_m); + free_CSR_matrix(A2); + return 0; +} + +/* Wrapper dispatch sanity: (PD, CSR_matrix). Compare against direct + BTDA_csc_pd_fill_values. */ +const char *test_BTDA_matrices_pd_csr(void) +{ + /* A: 4x5 PD, row_perm = [1, 3], col_perm = [0, 2]. */ + int row_perm_A[2] = {1, 3}; + int col_perm_A[2] = {0, 2}; + double XA[4] = {1.0, 2.0, 3.0, 4.0}; + matrix *A_m = new_permuted_dense(4, 5, 2, 2, row_perm_A, col_perm_A, XA); + + /* B: 4x4 CSR_matrix. */ + CSR_matrix *B = new_CSR_matrix(4, 4, 5); + B->p[0] = 0; + B->p[1] = 2; + B->p[2] = 3; + B->p[3] = 4; + B->p[4] = 5; + int Bi[5] = {0, 2, 1, 0, 3}; + double Bx[5] = {10.0, 20.0, 30.0, 40.0, 50.0}; + memcpy(B->i, Bi, sizeof Bi); + memcpy(B->x, Bx, sizeof Bx); + matrix *B_m = new_sparse_matrix(B); + + double d[4] = {1.0, -2.0, 0.5, 3.0}; + + /* Wrapper path. Dispatchers don't touch sparse_matrix internals — caller + owns csc_cache structure and values. */ + sparse_matrix_ensure_csc_cache((sparse_matrix *) B_m); + matrix *C_m = BTA_matrices_alloc(A_m, B_m); + B_m->refresh_csc_values(B_m); + BTDA_matrices_fill_values(A_m, d, B_m, C_m); + + /* Direct primitive path: production now dispatches the (PD, Sparse) + branch through CSC-pd kernels. Build a CSC view of B and call + BTA_csc_pd_alloc + BTDA_csc_pd_fill_values to match. */ + matrix *A2_m = new_permuted_dense(4, 5, 2, 2, row_perm_A, col_perm_A, XA); + permuted_dense *A2 = (permuted_dense *) A2_m; + CSR_matrix *B2_csr = new_CSR_matrix(4, 4, 5); + B2_csr->p[0] = 0; + B2_csr->p[1] = 2; + B2_csr->p[2] = 3; + B2_csr->p[3] = 4; + B2_csr->p[4] = 5; + memcpy(B2_csr->i, Bi, sizeof Bi); + memcpy(B2_csr->x, Bx, sizeof Bx); + int *iwork = (int *) malloc(MAX(B2_csr->m, B2_csr->n) * sizeof(int)); + CSC_matrix *B2_csc = csr_to_csc_alloc(B2_csr, iwork); + csr_to_csc_fill_values(B2_csr, B2_csc, iwork); + matrix *C2 = BTA_csc_pd_alloc(B2_csc, A2); + BTDA_csc_pd_fill_values(B2_csc, d, A2, (permuted_dense *) C2); + + mu_assert("values", cmp_double_array(C_m->x, C2->x, C_m->nnz)); + + free_matrix(C_m); + free_matrix(B_m); + free_matrix(A_m); + free_matrix(C2); + free_CSC_matrix(B2_csc); + free_CSR_matrix(B2_csr); + free(iwork); + free_matrix(A2_m); + return 0; +} + +#endif /* TEST_MATRIX_BTA_H */ diff --git a/tests/utils/test_permuted_dense.h b/tests/utils/test_permuted_dense.h new file mode 100644 index 0000000..b5baca4 --- /dev/null +++ b/tests/utils/test_permuted_dense.h @@ -0,0 +1,1004 @@ +#ifndef TEST_PERMUTED_DENSE_H +#define TEST_PERMUTED_DENSE_H + +#include "minunit.h" +#include "old-code/old_permuted_dense.h" +#include "test_helpers.h" +#include "utils/CSC_matrix.h" +#include "utils/matrix_BTA.h" +#include "utils/permuted_dense.h" +#include "utils/sparse_matrix.h" +#include "utils/utils.h" +#include +#include + +/* 5x6 matrix with a 3x2 dense block at rows {1, 2, 4}, cols {0, 3}: + + global view: + [0 0 0 0 0 0] + [1 0 0 2 0 0] + [3 0 0 4 0 0] + [0 0 0 0 0 0] + [5 0 0 6 0 0] */ +const char *test_permuted_dense_to_csr_basic(void) +{ + int row_perm[3] = {1, 2, 4}; + int col_perm[2] = {0, 3}; + double X[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + + matrix *M = new_permuted_dense(5, 6, 3, 2, row_perm, col_perm, X); + + CSR_matrix *C = M->to_csr(M); + int Cp_expected[6] = {0, 0, 2, 4, 4, 6}; + int Ci_expected[6] = {0, 3, 0, 3, 0, 3}; + double Cx_expected[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + + mu_assert("dim m", C->m == 5); + mu_assert("dim n", C->n == 6); + mu_assert("nnz", C->nnz == 6); + mu_assert("p", cmp_int_array(C->p, Cp_expected, 6)); + mu_assert("i", cmp_int_array(C->i, Ci_expected, 6)); + mu_assert("x", cmp_double_array(C->x, Cx_expected, 6)); + + free_matrix(M); + return 0; +} + +/* Empty dense block (m0 = n0 = 0): result is an m x n CSR_matrix with + no nonzeros. */ +const char *test_permuted_dense_to_csr_empty(void) +{ + matrix *M = new_permuted_dense(4, 5, 0, 0, NULL, NULL, NULL); + + CSR_matrix *C = M->to_csr(M); + int Cp_expected[5] = {0, 0, 0, 0, 0}; + mu_assert("nnz", C->nnz == 0); + mu_assert("p", cmp_int_array(C->p, Cp_expected, 5)); + + free_matrix(M); + return 0; +} + +/* Full dense (row_perm = [0..m), col_perm = [0..n)): result is the dense + matrix in CSR_matrix. */ +const char *test_permuted_dense_to_csr_full(void) +{ + int row_perm[2] = {0, 1}; + int col_perm[3] = {0, 1, 2}; + double X[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + + matrix *M = new_permuted_dense(2, 3, 2, 3, row_perm, col_perm, X); + + CSR_matrix *C = M->to_csr(M); + int Cp_expected[3] = {0, 3, 6}; + int Ci_expected[6] = {0, 1, 2, 0, 1, 2}; + double Cx_expected[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + + mu_assert("p", cmp_int_array(C->p, Cp_expected, 3)); + mu_assert("i", cmp_int_array(C->i, Ci_expected, 6)); + mu_assert("x", cmp_double_array(C->x, Cx_expected, 6)); + + free_matrix(M); + return 0; +} + +/* Single dense row, two dense cols. Tests rows with no entries before + and after the active row. */ +const char *test_permuted_dense_to_csr_single_row(void) +{ + int row_perm[1] = {2}; + int col_perm[2] = {1, 4}; + double X[2] = {7.0, 9.0}; + + matrix *M = new_permuted_dense(4, 5, 1, 2, row_perm, col_perm, X); + + CSR_matrix *C = M->to_csr(M); + int Cp_expected[5] = {0, 0, 0, 2, 2}; + int Ci_expected[2] = {1, 4}; + double Cx_expected[2] = {7.0, 9.0}; + + mu_assert("p", cmp_int_array(C->p, Cp_expected, 5)); + mu_assert("i", cmp_int_array(C->i, Ci_expected, 2)); + mu_assert("x", cmp_double_array(C->x, Cx_expected, 2)); + + free_matrix(M); + return 0; +} + +/* Single dense col across multiple rows. */ +const char *test_permuted_dense_to_csr_single_col(void) +{ + int row_perm[3] = {0, 2, 3}; + int col_perm[1] = {2}; + double X[3] = {1.0, 2.0, 3.0}; + + matrix *M = new_permuted_dense(4, 4, 3, 1, row_perm, col_perm, X); + + CSR_matrix *C = M->to_csr(M); + int Cp_expected[5] = {0, 1, 1, 2, 3}; + int Ci_expected[3] = {2, 2, 2}; + double Cx_expected[3] = {1.0, 2.0, 3.0}; + + mu_assert("p", cmp_int_array(C->p, Cp_expected, 5)); + mu_assert("i", cmp_int_array(C->i, Ci_expected, 3)); + mu_assert("x", cmp_double_array(C->x, Cx_expected, 3)); + + free_matrix(M); + return 0; +} + +/* DA_fill_values: compare against CSR_matrix DA_fill_values on the equivalent + CSR_matrix. + + PD is the 5x6 matrix from the basic to_csr test, with d a length-5 + global-row diagonal including a negative and zero entry. */ +const char *test_DA_pd_fill_values(void) +{ + int row_perm[3] = {1, 2, 4}; + int col_perm[2] = {0, 3}; + double X[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + double d[5] = {7.0, -1.5, 0.0, 9.0, 2.5}; + + matrix *M = new_permuted_dense(5, 6, 3, 2, row_perm, col_perm, X); + matrix *M_out = new_permuted_dense(5, 6, 3, 2, row_perm, col_perm, NULL); + permuted_dense *pd = (permuted_dense *) M; + permuted_dense *pd_out = (permuted_dense *) M_out; + + DA_pd_fill_values(d, pd, pd_out); + + /* Ground truth: build CSR_matrix of self, run DA_fill_values, compare. */ + CSR_matrix *csr = M->to_csr(M); + CSR_matrix *csr_expected = new_csr_copy_sparsity(csr); + DA_fill_values(d, csr, csr_expected); + + CSR_matrix *csr_out = M_out->to_csr(M_out); + mu_assert("x", cmp_double_array(csr_out->x, csr_expected->x, csr->nnz)); + + free_CSR_matrix(csr_expected); + free_matrix(M); + free_matrix(M_out); + return 0; +} + +/* ATA_alloc: structure-only check. Output is 6x6 with a 2x2 dense block at + perms {0, 3} (= self.col_perm on both sides). Values are uninitialized + here; ATDA_fill_values is the value-producing op. */ +const char *test_ATA_pd_alloc(void) +{ + int row_perm[3] = {1, 2, 4}; + int col_perm[2] = {0, 3}; + double X[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + + matrix *M = new_permuted_dense(5, 6, 3, 2, row_perm, col_perm, X); + permuted_dense *pd = (permuted_dense *) M; + + matrix *M_ata = ATA_pd_alloc(pd); + permuted_dense *pd_ata = (permuted_dense *) M_ata; + + int perm_expected[2] = {0, 3}; + mu_assert("m", M_ata->m == 6); + mu_assert("n", M_ata->n == 6); + mu_assert("m0", pd_ata->m0 == 2); + mu_assert("n0", pd_ata->n0 == 2); + mu_assert("row_perm", cmp_int_array(pd_ata->row_perm, perm_expected, 2)); + mu_assert("col_perm", cmp_int_array(pd_ata->col_perm, perm_expected, 2)); + + free_matrix(M); + free_matrix(M_ata); + return 0; +} + +/* ATDA: same 5x6 PD, d with negative + zero entries to catch sign bugs. + Hand-computed: d_perm = [-1.5, 0, 2.5], Y = diag(d_perm) X gives + [[-1.5,-3],[0,0],[12.5,15]], and X^T Y = [[61,72],[72,84]]. */ +const char *test_ATDA_pd_fill_values(void) +{ + int row_perm[3] = {1, 2, 4}; + int col_perm[2] = {0, 3}; + double X[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + double d[5] = {7.0, -1.5, 0.0, 9.0, 2.5}; + + matrix *M = new_permuted_dense(5, 6, 3, 2, row_perm, col_perm, X); + permuted_dense *pd = (permuted_dense *) M; + + matrix *M_out = ATA_pd_alloc(pd); + permuted_dense *pd_out = (permuted_dense *) M_out; + ATDA_pd_fill_values(pd, d, pd_out); + + double X_expected[4] = {61.0, 72.0, 72.0, 84.0}; + mu_assert("X", cmp_double_array(pd_out->X, X_expected, 4)); + + free_matrix(M); + free_matrix(M_out); + return 0; +} + +/* PD x CSC_matrix: J is 6x4. col 0 empty; col 1 has rows {0,3} (vals 10, 20); + col 2 has row {2} (val 30, but row 2 not in col_perm_self = {0,3} so col 2 + is INACTIVE); col 3 has row {3} (val 40). Active cols: {1, 3}. + + Expected: m0=3, n0=2, row_perm={1,2,4}, col_perm={1,3}. + Values: out.X[:,0] = 10*[1,3,5] + 20*[2,4,6] = [50,110,170], + out.X[:,1] = 40*[2,4,6] = [80,160,240]. */ +const char *test_permuted_dense_times_csc(void) +{ + int row_perm[3] = {1, 2, 4}; + int col_perm[2] = {0, 3}; + double X[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + matrix *M = new_permuted_dense(5, 6, 3, 2, row_perm, col_perm, X); + permuted_dense *pd = (permuted_dense *) M; + + CSC_matrix *J = new_CSC_matrix(6, 4, 4); + int Jp[5] = {0, 0, 2, 3, 4}; + int Ji[4] = {0, 3, 2, 3}; + double Jx[4] = {10.0, 20.0, 30.0, 40.0}; + memcpy(J->p, Jp, 5 * sizeof(int)); + memcpy(J->i, Ji, 4 * sizeof(int)); + memcpy(J->x, Jx, 4 * sizeof(double)); + + matrix *M_out = BA_pd_csc_alloc(pd, J); + permuted_dense *pd_out = (permuted_dense *) M_out; + BA_pd_csc_fill_values(pd->X, pd->n0, pd->col_inv, J, pd_out); + + int row_perm_expected[3] = {1, 2, 4}; + int col_perm_expected[2] = {1, 3}; + double X_expected[6] = {50.0, 80.0, 110.0, 160.0, 170.0, 240.0}; + + mu_assert("m", M_out->m == 5); + mu_assert("n", M_out->n == 4); + mu_assert("m0", pd_out->m0 == 3); + mu_assert("n0", pd_out->n0 == 2); + mu_assert("row_perm", cmp_int_array(pd_out->row_perm, row_perm_expected, 3)); + mu_assert("col_perm", cmp_int_array(pd_out->col_perm, col_perm_expected, 2)); + mu_assert("X", cmp_double_array(pd_out->X, X_expected, 6)); + + free_matrix(M); + free_matrix(M_out); + free_CSC_matrix(J); + return 0; +} + +/* PD x CSC_matrix edge case: every column of J has its only nonzero outside + col_perm_self, so col_perm_out is empty (n0 = 0). */ +const char *test_permuted_dense_times_csc_no_active(void) +{ + int row_perm[3] = {1, 2, 4}; + int col_perm[2] = {0, 3}; + double X[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + matrix *M = new_permuted_dense(5, 6, 3, 2, row_perm, col_perm, X); + permuted_dense *pd = (permuted_dense *) M; + + /* J: col 0 has row {1}, col 1 has row {5}. Neither in col_perm_self. */ + CSC_matrix *J = new_CSC_matrix(6, 2, 2); + int Jp[3] = {0, 1, 2}; + int Ji[2] = {1, 5}; + double Jx[2] = {100.0, 200.0}; + memcpy(J->p, Jp, 3 * sizeof(int)); + memcpy(J->i, Ji, 2 * sizeof(int)); + memcpy(J->x, Jx, 2 * sizeof(double)); + + matrix *M_out = BA_pd_csc_alloc(pd, J); + permuted_dense *pd_out = (permuted_dense *) M_out; + BA_pd_csc_fill_values(pd->X, pd->n0, pd->col_inv, J, pd_out); + + mu_assert("m", M_out->m == 5); + mu_assert("n", M_out->n == 2); + mu_assert("m0", pd_out->m0 == 3); + mu_assert("n0", pd_out->n0 == 0); + + free_matrix(M); + free_matrix(M_out); + free_CSC_matrix(J); + return 0; +} + +/* to_csr vtable method: lazy CSR_matrix view. First call allocates pd->csr_cache; + subsequent calls refresh values to reflect the current pd->X. */ +const char *test_permuted_dense_to_csr_lazy(void) +{ + int row_perm[3] = {1, 2, 4}; + int col_perm[2] = {0, 3}; + double X[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + + matrix *M = new_permuted_dense(5, 6, 3, 2, row_perm, col_perm, X); + permuted_dense *pd = (permuted_dense *) M; + + mu_assert("csr_cache initially NULL", pd->csr_cache == NULL); + + CSR_matrix *csr = M->to_csr(M); + mu_assert("csr_cache populated", pd->csr_cache != NULL); + mu_assert("returns the cache", csr == pd->csr_cache); + + double expected[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + mu_assert("values match X", cmp_double_array(csr->x, expected, 6)); + + /* Mutate X and re-call to_csr: values must reflect the change. */ + pd->X[0] = 99.0; + csr = M->to_csr(M); + mu_assert("refresh picks up new value", csr->x[0] == 99.0); + + free_matrix(M); + return 0; +} + +/* Sanity check: col_inv is built correctly. col_perm = {0, 3} on n = 6 + should give col_inv = {0, -1, -1, 1, -1, -1}. */ +const char *test_permuted_dense_col_inv(void) +{ + int row_perm[1] = {0}; + int col_perm[2] = {0, 3}; + double X[2] = {0.0, 0.0}; + + matrix *M = new_permuted_dense(1, 6, 1, 2, row_perm, col_perm, X); + permuted_dense *pd = (permuted_dense *) M; + + int expected[6] = {0, -1, -1, 1, -1, -1}; + mu_assert("col_inv", cmp_int_array(pd->col_inv, expected, 6)); + + free_matrix(M); + return 0; +} + +/* PD index_alloc / index_fill_values: select rows from a PD; output must be + another PD with row_perm equal to the output positions where indices[i] + hit the source row_perm. */ +const char *test_permuted_dense_index(void) +{ + /* Source PD, shape (6, 4), dense block at rows {1, 3, 4} x cols {0, 2}. */ + int row_perm[3] = {1, 3, 4}; + int col_perm[2] = {0, 2}; + double X[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + matrix *M = new_permuted_dense(6, 4, 3, 2, row_perm, col_perm, X); + + /* Index by [0, 3, 1, 5, 4]: + - position 0 -> source row 0 (not in row_perm, zero) + - position 1 -> source row 3 (in row_perm at ii=1, dense) + - position 2 -> source row 1 (in row_perm at ii=0, dense) + - position 3 -> source row 5 (not in row_perm, zero) + - position 4 -> source row 4 (in row_perm at ii=2, dense) */ + int indices[5] = {0, 3, 1, 5, 4}; + matrix *out = M->index_alloc(M, indices, 5); + permuted_dense *out_pd = (permuted_dense *) out; + + mu_assert("out m", out->m == 5); + mu_assert("out n", out->n == 4); + mu_assert("out nnz", out->nnz == 6); /* m0=3 * n0=2 */ + mu_assert("m0", out_pd->m0 == 3); + mu_assert("n0", out_pd->n0 == 2); + + int expected_row_perm[3] = {1, 2, 4}; + mu_assert("row_perm", cmp_int_array(out_pd->row_perm, expected_row_perm, 3)); + int expected_col_perm[2] = {0, 2}; + mu_assert("col_perm", cmp_int_array(out_pd->col_perm, expected_col_perm, 2)); + + M->index_fill_values(M, indices, 5, out); + + /* Row 0 of out (i=1) = source row 3 = X[1, :] = {3, 4}. + Row 1 of out (i=2) = source row 1 = X[0, :] = {1, 2}. + Row 2 of out (i=4) = source row 4 = X[2, :] = {5, 6}. */ + double expected_X[6] = {3.0, 4.0, 1.0, 2.0, 5.0, 6.0}; + mu_assert("values", cmp_double_array(out_pd->X, expected_X, 6)); + + free_matrix(out); + free_matrix(M); + return 0; +} + +/* PD promote_alloc / promote_fill_values: tile a 1-row PD into a + `size`-row PD where every row is a copy of the source row. */ +const char *test_permuted_dense_promote(void) +{ + /* Source PD, shape (1, 5), single dense row at row 0, cols {1, 3}. */ + int row_perm[1] = {0}; + int col_perm[2] = {1, 3}; + double X[2] = {7.0, 9.0}; + matrix *M = new_permuted_dense(1, 5, 1, 2, row_perm, col_perm, X); + + matrix *out = M->promote_alloc(M, 4); + permuted_dense *out_pd = (permuted_dense *) out; + + mu_assert("out m", out->m == 4); + mu_assert("out n", out->n == 5); + mu_assert("out nnz", out->nnz == 8); /* m0=4 * n0=2 */ + mu_assert("m0", out_pd->m0 == 4); + mu_assert("n0", out_pd->n0 == 2); + + int expected_row_perm[4] = {0, 1, 2, 3}; + mu_assert("row_perm", cmp_int_array(out_pd->row_perm, expected_row_perm, 4)); + int expected_col_perm[2] = {1, 3}; + mu_assert("col_perm", cmp_int_array(out_pd->col_perm, expected_col_perm, 2)); + + M->promote_fill_values(M, out); + + double expected_X[8] = {7.0, 9.0, 7.0, 9.0, 7.0, 9.0, 7.0, 9.0}; + mu_assert("values", cmp_double_array(out_pd->X, expected_X, 8)); + + free_matrix(out); + free_matrix(M); + return 0; +} + +/* PD broadcast_alloc / broadcast_fill_values, SCALAR variant. + (1, 5) PD with single dense row -> (d1*d2, 5) PD with that row tiled. */ +const char *test_permuted_dense_broadcast_scalar(void) +{ + int row_perm[1] = {0}; + int col_perm[2] = {1, 3}; + double X[2] = {7.0, 9.0}; + matrix *M = new_permuted_dense(1, 5, 1, 2, row_perm, col_perm, X); + + int d1 = 2, d2 = 3; /* out shape (2, 3), m = 6 */ + matrix *out = M->broadcast_alloc(M, BROADCAST_SCALAR, d1, d2); + permuted_dense *out_pd = (permuted_dense *) out; + + mu_assert("out m", out->m == 6); + mu_assert("out n", out->n == 5); + mu_assert("m0", out_pd->m0 == 6); + mu_assert("n0", out_pd->n0 == 2); + int expected_rp[6] = {0, 1, 2, 3, 4, 5}; + mu_assert("row_perm", cmp_int_array(out_pd->row_perm, expected_rp, 6)); + + M->broadcast_fill_values(M, BROADCAST_SCALAR, d1, d2, out); + double expected_X[12] = {7, 9, 7, 9, 7, 9, 7, 9, 7, 9, 7, 9}; + mu_assert("values", cmp_double_array(out_pd->X, expected_X, 12)); + + free_matrix(out); + free_matrix(M); + return 0; +} + +/* PD broadcast_alloc / broadcast_fill_values, ROW variant. + (1, d2) input has Jacobian of shape (d2, n_vars). Source PD: m=d2=3, + row_perm={0, 2} (rows 0 and 2 dense), col_perm={1, 4}, single dense row + per m0. Output (d1, d2) = (2, 3): each child row replicated d1=2 + times. */ +const char *test_permuted_dense_broadcast_row(void) +{ + int row_perm[2] = {0, 2}; + int col_perm[2] = {1, 4}; + double X[4] = {1.0, 2.0, /* row corresponding to child row 0 */ + 3.0, 4.0}; /* row corresponding to child row 2 */ + matrix *M = new_permuted_dense(3, 6, 2, 2, row_perm, col_perm, X); + + int d1 = 2, d2 = 3; /* output (2, 3), out m = 6 */ + matrix *out = M->broadcast_alloc(M, BROADCAST_ROW, d1, d2); + permuted_dense *out_pd = (permuted_dense *) out; + + mu_assert("out m", out->m == 6); + mu_assert("m0", out_pd->m0 == 4); /* d1 * 2 */ + mu_assert("n0", out_pd->n0 == 2); + /* row_perm = {child_row_perm[0]*d1, +1, child_row_perm[1]*d1, +1} + = {0, 1, 4, 5} */ + int expected_rp[4] = {0, 1, 4, 5}; + mu_assert("row_perm", cmp_int_array(out_pd->row_perm, expected_rp, 4)); + + M->broadcast_fill_values(M, BROADCAST_ROW, d1, d2, out); + /* each child row replicated d1 times */ + double expected_X[8] = {1.0, 2.0, 1.0, 2.0, 3.0, 4.0, 3.0, 4.0}; + mu_assert("values", cmp_double_array(out_pd->X, expected_X, 8)); + + free_matrix(out); + free_matrix(M); + return 0; +} + +/* PD broadcast_alloc / broadcast_fill_values, COL variant. + (d1, 1) input has Jacobian of shape (d1, n_vars). Source PD: m=d1=3, + row_perm={0, 2}, col_perm={1, 4}, two dense rows. Output (d1, d2) = (3, 2), + out m = 6: each child row appears d2 times, shifted by j*d1. */ +const char *test_permuted_dense_broadcast_col(void) +{ + int row_perm[2] = {0, 2}; + int col_perm[2] = {1, 4}; + double X[4] = {1.0, 2.0, 3.0, 4.0}; + matrix *M = new_permuted_dense(3, 6, 2, 2, row_perm, col_perm, X); + + int d1 = 3, d2 = 2; + matrix *out = M->broadcast_alloc(M, BROADCAST_COL, d1, d2); + permuted_dense *out_pd = (permuted_dense *) out; + + mu_assert("out m", out->m == 6); + mu_assert("m0", out_pd->m0 == 4); /* d2 * 2 */ + mu_assert("n0", out_pd->n0 == 2); + /* row_perm = {0+0, 0+2, 3+0, 3+2} = {0, 2, 3, 5} */ + int expected_rp[4] = {0, 2, 3, 5}; + mu_assert("row_perm", cmp_int_array(out_pd->row_perm, expected_rp, 4)); + + M->broadcast_fill_values(M, BROADCAST_COL, d1, d2, out); + /* X = d2 copies of full source X block */ + double expected_X[8] = {1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0, 4.0}; + mu_assert("values", cmp_double_array(out_pd->X, expected_X, 8)); + + free_matrix(out); + free_matrix(M); + return 0; +} + +/* PD diag_vec_alloc / diag_vec_fill_values. + Source PD shape (3, 6) with m0=2 (rows 0 and 2) -> output PD shape + (9, 6) with the same 2 dense rows mapped to positions {0, 8} = {0*4, 2*4}. */ +const char *test_permuted_dense_diag_vec(void) +{ + int row_perm[2] = {0, 2}; + int col_perm[2] = {1, 4}; + double X[4] = {1.0, 2.0, 3.0, 4.0}; + matrix *M = new_permuted_dense(3, 6, 2, 2, row_perm, col_perm, X); + + matrix *out = M->diag_vec_alloc(M); + permuted_dense *out_pd = (permuted_dense *) out; + + mu_assert("out m", out->m == 9); + mu_assert("out n", out->n == 6); + mu_assert("m0", out_pd->m0 == 2); + mu_assert("n0", out_pd->n0 == 2); + /* row_perm = {0*(n+1), 2*(n+1)} = {0, 8} */ + int expected_rp[2] = {0, 8}; + mu_assert("row_perm", cmp_int_array(out_pd->row_perm, expected_rp, 2)); + int expected_cp[2] = {1, 4}; + mu_assert("col_perm", cmp_int_array(out_pd->col_perm, expected_cp, 2)); + + M->diag_vec_fill_values(M, out); + /* X is identical to the source X */ + double expected_X[4] = {1.0, 2.0, 3.0, 4.0}; + mu_assert("values", cmp_double_array(out_pd->X, expected_X, 4)); + + free_matrix(out); + free_matrix(M); + return 0; +} + +/* ---- Helpers for BTA / BTDA tests ---- */ + +/* Scatter a PD into a dense m x n_global buffer (row-major), zero-filled. + Buffer is allocated by the caller. */ +static void scatter_pd_to_dense(const permuted_dense *pd, int n_global, + double *dense) +{ + int m = pd->base.m; + memset(dense, 0, (size_t) m * (size_t) n_global * sizeof(double)); + for (int ii = 0; ii < pd->m0; ii++) + { + int row = pd->row_perm[ii]; + for (int jj = 0; jj < pd->n0; jj++) + { + int col = pd->col_perm[jj]; + dense[row * n_global + col] = pd->X[ii * pd->n0 + jj]; + } + } +} + +/* BTA: A and B share row_perm = [1, 3]; both have m=4, distinct col_perms. + C = B^T A is computed via the primitive and compared against a hand + reference X_B^T X_A. */ +const char *test_permuted_dense_BTA_matching_row_perm(void) +{ + int row_perm[2] = {1, 3}; + int col_perm_A[2] = {0, 2}; + int col_perm_B[2] = {1, 3}; + /* X_A is (2, 2), X_B is (2, 2), both row-major. */ + double XA[4] = {1.0, 2.0, 3.0, 4.0}; /* rows: [1,2], [3,4] */ + double XB[4] = {5.0, 6.0, 7.0, 8.0}; /* rows: [5,6], [7,8] */ + matrix *A_m = new_permuted_dense(4, 4, 2, 2, row_perm, col_perm_A, XA); + matrix *B_m = new_permuted_dense(4, 4, 2, 2, row_perm, col_perm_B, XB); + permuted_dense *A = (permuted_dense *) A_m; + permuted_dense *B = (permuted_dense *) B_m; + + matrix *C_m = BTA_pd_pd_alloc(B, A); + permuted_dense *C = (permuted_dense *) C_m; + + mu_assert("out m", C_m->m == 4); /* B.n */ + mu_assert("out n", C_m->n == 4); /* A.n */ + mu_assert("m0", C->m0 == 2); + mu_assert("n0", C->n0 == 2); + mu_assert("row_perm", cmp_int_array(C->row_perm, col_perm_B, 2)); + mu_assert("col_perm", cmp_int_array(C->col_perm, col_perm_A, 2)); + + BTA_pd_pd_fill_values(B, A, C); + + /* Reference: X_B^T X_A. With X_B = [[5,6],[7,8]], X_A = [[1,2],[3,4]]: + X_B^T = [[5,7],[6,8]]. X_B^T X_A = [[5*1+7*3, 5*2+7*4], [6*1+8*3, 6*2+8*4]] + = [[26, 38], [30, 44]]. */ + double expected[4] = {26.0, 38.0, 30.0, 44.0}; + mu_assert("values", cmp_double_array(C->X, expected, 4)); + + free_matrix(C_m); + free_matrix(B_m); + free_matrix(A_m); + return 0; +} + +/* BTA with empty row intersection: row_perm_A = [0, 2], row_perm_B = [1, 3]. + BTA_pd_pd_alloc should return an empty C (nnz = 0); the fill + kernels should short-circuit without crashing. */ +const char *test_permuted_dense_BTA_empty_overlap(void) +{ + int row_perm_A[2] = {0, 2}; + int row_perm_B[2] = {1, 3}; + int col_perm_A[2] = {0, 2}; + int col_perm_B[2] = {1, 3}; + double XA[4] = {1.0, 2.0, 3.0, 4.0}; + double XB[4] = {5.0, 6.0, 7.0, 8.0}; + matrix *A_m = new_permuted_dense(4, 4, 2, 2, row_perm_A, col_perm_A, XA); + matrix *B_m = new_permuted_dense(4, 4, 2, 2, row_perm_B, col_perm_B, XB); + permuted_dense *A = (permuted_dense *) A_m; + permuted_dense *B = (permuted_dense *) B_m; + + matrix *C_m = BTA_pd_pd_alloc(B, A); + permuted_dense *C = (permuted_dense *) C_m; + + mu_assert("out m", C_m->m == 4); /* B.n */ + mu_assert("out n", C_m->n == 4); /* A.n */ + mu_assert("m0", C->m0 == 0); + mu_assert("n0", C->n0 == 0); + mu_assert("nnz", C_m->nnz == 0); + + /* fill kernels should be safe no-ops on empty C. */ + BTA_pd_pd_fill_values(B, A, C); + double d[4] = {1.0, 1.0, 1.0, 1.0}; + BTDA_pd_pd_fill_values(B, d, A, C); + + free_matrix(C_m); + free_matrix(B_m); + free_matrix(A_m); + return 0; +} + +/* BTA with partial overlap: row_perm_A = [1, 3, 5], row_perm_B = [3, 5, 7]. + Intersection = {3, 5}. */ +const char *test_permuted_dense_BTA_partial_overlap(void) +{ + int row_perm_A[3] = {1, 3, 5}; + int row_perm_B[3] = {3, 5, 7}; + int col_perm_A[2] = {0, 2}; + int col_perm_B[2] = {1, 3}; + /* X_A rows correspond to A row_perm order: row 0 -> source row 1, row 1 -> 3, + * row 2 -> 5. */ + double XA[6] = {1.0, 2.0, /* row 1 (NOT in B) */ + 3.0, 4.0, /* row 3 (in B at pos 0) */ + 5.0, 6.0}; /* row 5 (in B at pos 1) */ + /* X_B rows: row 0 -> source row 3, row 1 -> 5, row 2 -> 7. */ + double XB[6] = {10.0, 20.0, /* row 3 (in A at pos 1) */ + 30.0, 40.0, /* row 5 (in A at pos 2) */ + 50.0, 60.0}; /* row 7 (NOT in A) */ + matrix *A_m = new_permuted_dense(8, 4, 3, 2, row_perm_A, col_perm_A, XA); + matrix *B_m = new_permuted_dense(8, 4, 3, 2, row_perm_B, col_perm_B, XB); + permuted_dense *A = (permuted_dense *) A_m; + permuted_dense *B = (permuted_dense *) B_m; + + matrix *C_m = BTA_pd_pd_alloc(B, A); + permuted_dense *C = (permuted_dense *) C_m; + BTA_pd_pd_fill_values(B, A, C); + + /* Reference: scatter A, B to dense 8x4, compute B^T A, compare block at + (col_perm_B, col_perm_A). */ + double *A_d = (double *) calloc((size_t) 8 * 4, sizeof(double)); + double *B_d = (double *) calloc((size_t) 8 * 4, sizeof(double)); + scatter_pd_to_dense(A, 4, A_d); + scatter_pd_to_dense(B, 4, B_d); + + /* Reference C_ref is 4x4 = B_d^T (4x8) * A_d (8x4). */ + double C_ref[16]; + memset(C_ref, 0, sizeof C_ref); + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 4; j++) + { + double s = 0.0; + for (int k = 0; k < 8; k++) + { + s += B_d[k * 4 + i] * A_d[k * 4 + j]; + } + C_ref[i * 4 + j] = s; + } + } + + /* Extract reference block at (col_perm_B, col_perm_A) and compare to C->X. */ + double expected[4]; + for (int ii = 0; ii < 2; ii++) + { + for (int jj = 0; jj < 2; jj++) + { + expected[ii * 2 + jj] = C_ref[col_perm_B[ii] * 4 + col_perm_A[jj]]; + } + } + mu_assert("values", cmp_double_array(C->X, expected, 4)); + + free(A_d); + free(B_d); + free_matrix(C_m); + free_matrix(B_m); + free_matrix(A_m); + return 0; +} + +/* Full BTDA decomposition: tmp = diag(w) A; C = B^T tmp. Compare against a + dense triple product B_d^T diag(w) A_d. */ +const char *test_permuted_dense_BTDA_decomposition(void) +{ + int row_perm[3] = {0, 1, 2}; + int col_perm_A[2] = {0, 2}; + int col_perm_B[2] = {1, 3}; + double XA[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + double XB[6] = {7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; + double w[3] = {2.0, -1.0, 3.0}; + + matrix *A_m = new_permuted_dense(3, 4, 3, 2, row_perm, col_perm_A, XA); + matrix *B_m = new_permuted_dense(3, 4, 3, 2, row_perm, col_perm_B, XB); + permuted_dense *A = (permuted_dense *) A_m; + permuted_dense *B = (permuted_dense *) B_m; + + /* tmp has the same sparsity as A. */ + matrix *tmp_m = A_m->copy_sparsity(A_m); + permuted_dense *tmp = (permuted_dense *) tmp_m; + DA_pd_fill_values(w, A, tmp); + + matrix *C_m = BTA_pd_pd_alloc(B, tmp); + permuted_dense *C = (permuted_dense *) C_m; + BTA_pd_pd_fill_values(B, tmp, C); + + /* Reference: dense B_d^T diag(w) A_d, extract (col_perm_B, col_perm_A) block. */ + double *A_d = (double *) calloc((size_t) 3 * 4, sizeof(double)); + double *B_d = (double *) calloc((size_t) 3 * 4, sizeof(double)); + scatter_pd_to_dense(A, 4, A_d); + scatter_pd_to_dense(B, 4, B_d); + + double C_ref[16]; + memset(C_ref, 0, sizeof C_ref); + for (int i = 0; i < 4; i++) + { + for (int j = 0; j < 4; j++) + { + double s = 0.0; + for (int k = 0; k < 3; k++) + { + s += B_d[k * 4 + i] * w[k] * A_d[k * 4 + j]; + } + C_ref[i * 4 + j] = s; + } + } + double expected[4]; + for (int ii = 0; ii < 2; ii++) + { + for (int jj = 0; jj < 2; jj++) + { + expected[ii * 2 + jj] = C_ref[col_perm_B[ii] * 4 + col_perm_A[jj]]; + } + } + mu_assert("values", cmp_double_array(C->X, expected, 4)); + + free(A_d); + free(B_d); + free_matrix(C_m); + free_matrix(tmp_m); + free_matrix(B_m); + free_matrix(A_m); + return 0; +} + +/* BTA(CSR_matrix A, PD B): basic correctness against a dense reference. + A is (4, 5) CSR_matrix with mixed sparsity; B is (4, 4) PD with row_perm = [1, 3], + col_perm = [0, 2], dense block (2, 2). */ +/* BTA_pd_csc_alloc + BTDA_pd_csc_fill_values should match the legacy + CSR-pd kernels in old-code on both alloc structure and BTDA values. + Uses a d with negative + zero entries to exercise sign / drop paths. */ +const char *test_BTA_pd_csc_matches_csr(void) +{ + /* Same A and B as test_BTA_pd_csr_basic. */ + CSR_matrix *A_csr = new_CSR_matrix(4, 5, 7); + A_csr->p[0] = 0; + A_csr->p[1] = 2; + A_csr->p[2] = 4; + A_csr->p[3] = 5; + A_csr->p[4] = 7; + int Ai[7] = {1, 4, 0, 2, 2, 1, 4}; + double Ax[7] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}; + memcpy(A_csr->i, Ai, sizeof Ai); + memcpy(A_csr->x, Ax, sizeof Ax); + + int *iwork = (int *) malloc(MAX(A_csr->m, A_csr->n) * sizeof(int)); + CSC_matrix *A_csc = csr_to_csc_alloc(A_csr, iwork); + csr_to_csc_fill_values(A_csr, A_csc, iwork); + + int row_perm_B[2] = {1, 3}; + int col_perm_B[2] = {0, 2}; + double XB[4] = {10.0, 20.0, 30.0, 40.0}; + matrix *B_m = new_permuted_dense(4, 4, 2, 2, row_perm_B, col_perm_B, XB); + permuted_dense *B = (permuted_dense *) B_m; + + double d[4] = {1.5, -2.0, 0.0, 3.5}; + + /* CSR variant (baseline, from old-code). */ + matrix *C_csr_m = BTA_pd_csr_alloc(B, A_csr); + permuted_dense *C_csr = (permuted_dense *) C_csr_m; + BTDA_pd_csr_fill_values(B, d, A_csr, C_csr); + + /* CSC variant (under test). */ + matrix *C_csc_m = BTA_pd_csc_alloc(B, A_csc); + permuted_dense *C_csc = (permuted_dense *) C_csc_m; + BTDA_pd_csc_fill_values(B, d, A_csc, C_csc); + + /* Structural equality. */ + mu_assert("m matches", C_csc_m->m == C_csr_m->m); + mu_assert("n matches", C_csc_m->n == C_csr_m->n); + mu_assert("m0 matches", C_csc->m0 == C_csr->m0); + mu_assert("n0 matches", C_csc->n0 == C_csr->n0); + mu_assert("row_perm matches", + cmp_int_array(C_csc->row_perm, C_csr->row_perm, C_csr->m0)); + mu_assert("col_perm matches", + cmp_int_array(C_csc->col_perm, C_csr->col_perm, C_csr->n0)); + + /* Value equality (tolerance-based; dot ordering differs vs dgemm). */ + mu_assert("BTDA values match", + cmp_double_array(C_csc->X, C_csr->X, C_csr->m0 * C_csr->n0)); + + free_matrix(C_csr_m); + free_matrix(C_csc_m); + free_matrix(B_m); + free_CSC_matrix(A_csc); + free_CSR_matrix(A_csr); + free(iwork); + return 0; +} + +/* BA_pd_matrices: C = B @ A where B is full-block PD (the production + shape gated by left_matmul.c) and A is PD with non-trivial perms. + B (2x3) row_perm=[0,1], col_perm=[0,1,2], X_B=[[1,2,3],[4,5,6]]. + A (3x5) row_perm=[0,2], col_perm=[1,4], X_A=[[7,8],[9,10]]. + Hand-computed C (2x5) nonzero at cols {1,4}: X_C=[[34,38],[82,92]]. */ +const char *test_BA_pd_matrices_pd_pd_full_block_B(void) +{ + int row_perm_B[2] = {0, 1}; + int col_perm_B[3] = {0, 1, 2}; + double XB[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; + matrix *B_m = new_permuted_dense(2, 3, 2, 3, row_perm_B, col_perm_B, XB); + + int row_perm_A[2] = {0, 2}; + int col_perm_A[2] = {1, 4}; + double XA[4] = {7.0, 8.0, 9.0, 10.0}; + matrix *A_m = new_permuted_dense(3, 5, 2, 2, row_perm_A, col_perm_A, XA); + + matrix *C_m = BA_pd_matrices_alloc((permuted_dense *) B_m, A_m); + BA_pd_matrices_fill_values((permuted_dense *) B_m, A_m, (permuted_dense *) C_m); + + permuted_dense *C = (permuted_dense *) C_m; + mu_assert("dim m", C_m->m == 2); + mu_assert("dim n", C_m->n == 5); + mu_assert("m0", C->m0 == 2); + mu_assert("n0", C->n0 == 2); + int expected_row_perm[2] = {0, 1}; + int expected_col_perm[2] = {1, 4}; + mu_assert("row_perm", cmp_int_array(C->row_perm, expected_row_perm, 2)); + mu_assert("col_perm", cmp_int_array(C->col_perm, expected_col_perm, 2)); + double expected_X[4] = {34.0, 38.0, 82.0, 92.0}; + mu_assert("X", cmp_double_array(C->X, expected_X, 4)); + + free_matrix(C_m); + free_matrix(A_m); + free_matrix(B_m); + return 0; +} + +/* BA_pd_matrices with general (non-full-block) B. B->col_perm and + A->row_perm only partially overlap, exercising the + sorted_intersect_indices gather path. + B (2x5) row_perm=[0,1], col_perm=[1,3], X_B=[[1,2],[3,4]]. + A (5x4) row_perm=[1,2], col_perm=[0,3], X_A=[[5,6],[7,8]]. + Intersection K = {1,3} ∩ {1,2} = {1}, s=1. + Hand-computed C (2x4) nonzero at cols {0,3}: X_C=[[5,6],[15,18]]. */ +const char *test_BA_pd_matrices_pd_pd_general_B(void) +{ + int row_perm_B[2] = {0, 1}; + int col_perm_B[2] = {1, 3}; + double XB[4] = {1.0, 2.0, 3.0, 4.0}; + matrix *B_m = new_permuted_dense(2, 5, 2, 2, row_perm_B, col_perm_B, XB); + + int row_perm_A[2] = {1, 2}; + int col_perm_A[2] = {0, 3}; + double XA[4] = {5.0, 6.0, 7.0, 8.0}; + matrix *A_m = new_permuted_dense(5, 4, 2, 2, row_perm_A, col_perm_A, XA); + + matrix *C_m = BA_pd_matrices_alloc((permuted_dense *) B_m, A_m); + BA_pd_matrices_fill_values((permuted_dense *) B_m, A_m, (permuted_dense *) C_m); + + permuted_dense *C = (permuted_dense *) C_m; + mu_assert("dim m", C_m->m == 2); + mu_assert("dim n", C_m->n == 4); + mu_assert("m0", C->m0 == 2); + mu_assert("n0", C->n0 == 2); + int expected_row_perm[2] = {0, 1}; + int expected_col_perm[2] = {0, 3}; + mu_assert("row_perm", cmp_int_array(C->row_perm, expected_row_perm, 2)); + mu_assert("col_perm", cmp_int_array(C->col_perm, expected_col_perm, 2)); + double expected_X[4] = {5.0, 6.0, 15.0, 18.0}; + mu_assert("X", cmp_double_array(C->X, expected_X, 4)); + + free_matrix(C_m); + free_matrix(A_m); + free_matrix(B_m); + return 0; +} + +/* BA_pd_matrices with sparse A. Same B and same global A content as the + pd_pd_general_B test — the dispatcher routes through BA_pd_csc_* + and should yield byte-identical output. */ +const char *test_BA_pd_matrices_pd_csc(void) +{ + int row_perm_B[2] = {0, 1}; + int col_perm_B[2] = {1, 3}; + double XB[4] = {1.0, 2.0, 3.0, 4.0}; + matrix *B_m = new_permuted_dense(2, 5, 2, 2, row_perm_B, col_perm_B, XB); + + /* A as 5x4 sparse_matrix, same nonzero values as the PD case: + (1,0)=5, (1,3)=6, (2,0)=7, (2,3)=8. */ + CSR_matrix *csr = new_CSR_matrix(5, 4, 4); + int Ap[6] = {0, 0, 2, 4, 4, 4}; + int Ai[4] = {0, 3, 0, 3}; + double Ax[4] = {5.0, 6.0, 7.0, 8.0}; + memcpy(csr->p, Ap, 6 * sizeof(int)); + memcpy(csr->i, Ai, 4 * sizeof(int)); + memcpy(csr->x, Ax, 4 * sizeof(double)); + matrix *A_m = new_sparse_matrix(csr); + + matrix *C_m = BA_pd_matrices_alloc((permuted_dense *) B_m, A_m); + A_m->refresh_csc_values(A_m); /* values must be fresh before fill */ + BA_pd_matrices_fill_values((permuted_dense *) B_m, A_m, (permuted_dense *) C_m); + + permuted_dense *C = (permuted_dense *) C_m; + mu_assert("dim m", C_m->m == 2); + mu_assert("dim n", C_m->n == 4); + mu_assert("m0", C->m0 == 2); + mu_assert("n0", C->n0 == 2); + int expected_row_perm[2] = {0, 1}; + int expected_col_perm[2] = {0, 3}; + mu_assert("row_perm", cmp_int_array(C->row_perm, expected_row_perm, 2)); + mu_assert("col_perm", cmp_int_array(C->col_perm, expected_col_perm, 2)); + double expected_X[4] = {5.0, 6.0, 15.0, 18.0}; + mu_assert("X", cmp_double_array(C->X, expected_X, 4)); + + free_matrix(C_m); + free_matrix(A_m); + free_matrix(B_m); + return 0; +} + +/* BA_pd_matrices fast path: B->col_perm == A->row_perm exactly, so the + slow-path gather is skipped and one cblas_dgemm runs directly on + B->X and A->X. + B (2x4) row_perm=[0,1], col_perm=[1,3], X_B=[[1,2],[3,4]]. + A (4x3) row_perm=[1,3], col_perm=[0,2], X_A=[[5,6],[7,8]]. + Matching col_perm_B == row_perm_A == [1,3] triggers the fast path. + Hand-computed C (2x3) nonzero at cols {0,2}: X_C=[[19,22],[43,50]]. */ +const char *test_BA_pd_matrices_fast_path(void) +{ + int row_perm_B[2] = {0, 1}; + int col_perm_B[2] = {1, 3}; + double XB[4] = {1.0, 2.0, 3.0, 4.0}; + matrix *B_m = new_permuted_dense(2, 4, 2, 2, row_perm_B, col_perm_B, XB); + + int row_perm_A[2] = {1, 3}; + int col_perm_A[2] = {0, 2}; + double XA[4] = {5.0, 6.0, 7.0, 8.0}; + matrix *A_m = new_permuted_dense(4, 3, 2, 2, row_perm_A, col_perm_A, XA); + + matrix *C_m = BA_pd_matrices_alloc((permuted_dense *) B_m, A_m); + BA_pd_matrices_fill_values((permuted_dense *) B_m, A_m, (permuted_dense *) C_m); + + permuted_dense *C = (permuted_dense *) C_m; + mu_assert("dim m", C_m->m == 2); + mu_assert("dim n", C_m->n == 3); + mu_assert("m0", C->m0 == 2); + mu_assert("n0", C->n0 == 2); + int expected_row_perm[2] = {0, 1}; + int expected_col_perm[2] = {0, 2}; + mu_assert("row_perm", cmp_int_array(C->row_perm, expected_row_perm, 2)); + mu_assert("col_perm", cmp_int_array(C->col_perm, expected_col_perm, 2)); + double expected_X[4] = {19.0, 22.0, 43.0, 50.0}; + mu_assert("X", cmp_double_array(C->X, expected_X, 4)); + + free_matrix(C_m); + free_matrix(A_m); + free_matrix(B_m); + return 0; +} + +#endif /* TEST_PERMUTED_DENSE_H */ diff --git a/tests/wsum_hess/affine/test_broadcast.h b/tests/wsum_hess/affine/test_broadcast.h index 3d36cb4..6635a7d 100644 --- a/tests/wsum_hess/affine/test_broadcast.h +++ b/tests/wsum_hess/affine/test_broadcast.h @@ -56,12 +56,9 @@ const char *test_wsum_hess_broadcast_row(void) int expected_p[4] = {0, 1, 2, 3}; int expected_i[3] = {0, 1, 2}; - mu_assert("broadcast row wsum_hess: x values fail", - cmp_double_array(bcast->wsum_hess->x, expected_x, 3)); - mu_assert("broadcast row wsum_hess: row pointers fail", - cmp_int_array(bcast->wsum_hess->p, expected_p, 4)); - mu_assert("broadcast row wsum_hess: column indices fail", - cmp_int_array(bcast->wsum_hess->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(bcast->wsum_hess, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(bcast->wsum_hess, expected_p, expected_i, 3, 3)); free_expr(bcast); return 0; @@ -111,12 +108,9 @@ const char *test_wsum_hess_broadcast_col(void) int expected_p[4] = {0, 1, 2, 3}; int expected_i[3] = {0, 1, 2}; - mu_assert("broadcast col wsum_hess: x values fail", - cmp_double_array(bcast->wsum_hess->x, expected_x, 3)); - mu_assert("broadcast col wsum_hess: row pointers fail", - cmp_int_array(bcast->wsum_hess->p, expected_p, 4)); - mu_assert("broadcast col wsum_hess: column indices fail", - cmp_int_array(bcast->wsum_hess->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(bcast->wsum_hess, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(bcast->wsum_hess, expected_p, expected_i, 3, 3)); free_expr(bcast); return 0; @@ -159,12 +153,9 @@ const char *test_wsum_hess_broadcast_scalar_to_matrix(void) int expected_p[2] = {0, 1}; int expected_i[1] = {0}; - mu_assert("broadcast scalar wsum_hess: x values fail", - cmp_double_array(bcast->wsum_hess->x, expected_x, 1)); - mu_assert("broadcast scalar wsum_hess: row pointers fail", - cmp_int_array(bcast->wsum_hess->p, expected_p, 2)); - mu_assert("broadcast scalar wsum_hess: column indices fail", - cmp_int_array(bcast->wsum_hess->i, expected_i, 1)); + mu_assert("vals fail", cmp_values(bcast->wsum_hess, expected_x, 1)); + mu_assert("sparsity fail", + cmp_sparsity(bcast->wsum_hess, expected_p, expected_i, 1, 1)); free_expr(bcast); return 0; diff --git a/tests/wsum_hess/affine/test_diag_mat.h b/tests/wsum_hess/affine/test_diag_mat.h index aec571a..2774d5f 100644 --- a/tests/wsum_hess/affine/test_diag_mat.h +++ b/tests/wsum_hess/affine/test_diag_mat.h @@ -36,10 +36,9 @@ const char *test_wsum_hess_diag_mat_log(void) int expected_p[5] = {0, 1, 2, 3, 4}; int expected_i[4] = {0, 1, 2, 3}; - mu_assert("diag_mat log hess vals", - cmp_double_array(dm->wsum_hess->x, expected_x, 4)); - mu_assert("diag_mat log hess p", cmp_int_array(dm->wsum_hess->p, expected_p, 5)); - mu_assert("diag_mat log hess i", cmp_int_array(dm->wsum_hess->i, expected_i, 4)); + mu_assert("vals fail", cmp_values(dm->wsum_hess, expected_x, 4)); + mu_assert("sparsity fail", + cmp_sparsity(dm->wsum_hess, expected_p, expected_i, 4, 4)); free_expr(dm); return 0; diff --git a/tests/wsum_hess/affine/test_hstack.h b/tests/wsum_hess/affine/test_hstack.h index 78c8160..49809a9 100644 --- a/tests/wsum_hess/affine/test_hstack.h +++ b/tests/wsum_hess/affine/test_hstack.h @@ -90,12 +90,9 @@ const char *test_wsum_hess_hstack(void) int expected_p[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; int expected_i[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; - mu_assert("vals incorrect", - cmp_double_array(hstack_node->wsum_hess->x, expected_x, 9)); - mu_assert("rows incorrect", - cmp_int_array(hstack_node->wsum_hess->p, expected_p, 10)); - mu_assert("cols incorrect", - cmp_int_array(hstack_node->wsum_hess->i, expected_i, 9)); + mu_assert("vals fail", cmp_values(hstack_node->wsum_hess, expected_x, 9)); + mu_assert("sparsity fail", + cmp_sparsity(hstack_node->wsum_hess, expected_p, expected_i, 9, 9)); free_expr(hstack_node); return 0; @@ -200,12 +197,9 @@ const char *test_wsum_hess_hstack_matrix(void) int expected_i[18] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17}; - mu_assert("vals incorrect", - cmp_double_array(hstack_node->wsum_hess->x, expected_x, 18)); - mu_assert("rows incorrect", - cmp_int_array(hstack_node->wsum_hess->p, expected_p, 19)); - mu_assert("cols incorrect", - cmp_int_array(hstack_node->wsum_hess->i, expected_i, 18)); + mu_assert("vals fail", cmp_values(hstack_node->wsum_hess, expected_x, 18)); + mu_assert("sparsity fail", + cmp_sparsity(hstack_node->wsum_hess, expected_p, expected_i, 18, 18)); free_expr(hstack_node); return 0; diff --git a/tests/wsum_hess/affine/test_index.h b/tests/wsum_hess/affine/test_index.h index 3aa2030..6d77a39 100644 --- a/tests/wsum_hess/affine/test_index.h +++ b/tests/wsum_hess/affine/test_index.h @@ -37,10 +37,9 @@ const char *test_wsum_hess_index_log(void) int expected_p[4] = {0, 1, 2, 3}; int expected_i[3] = {0, 1, 2}; - mu_assert("index log hess vals", - cmp_double_array(idx->wsum_hess->x, expected_x, 3)); - mu_assert("index log hess p", cmp_int_array(idx->wsum_hess->p, expected_p, 4)); - mu_assert("index log hess i", cmp_int_array(idx->wsum_hess->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(idx->wsum_hess, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(idx->wsum_hess, expected_p, expected_i, 3, 3)); free_expr(idx); return 0; @@ -70,12 +69,9 @@ const char *test_wsum_hess_index_repeated(void) int expected_p[4] = {0, 1, 2, 3}; int expected_i[3] = {0, 1, 2}; - mu_assert("index repeated hess vals", - cmp_double_array(idx->wsum_hess->x, expected_x, 3)); - mu_assert("index repeated hess p", - cmp_int_array(idx->wsum_hess->p, expected_p, 4)); - mu_assert("index repeated hess i", - cmp_int_array(idx->wsum_hess->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(idx->wsum_hess, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(idx->wsum_hess, expected_p, expected_i, 3, 3)); free_expr(idx); return 0; @@ -109,12 +105,9 @@ const char *test_wsum_hess_sum_index_log(void) int expected_p[4] = {0, 1, 2, 3}; int expected_i[3] = {0, 1, 2}; - mu_assert("sum index log hess vals", - cmp_double_array(sum_node->wsum_hess->x, expected_x, 3)); - mu_assert("sum index log hess p", - cmp_int_array(sum_node->wsum_hess->p, expected_p, 4)); - mu_assert("sum index log hess i", - cmp_int_array(sum_node->wsum_hess->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(sum_node->wsum_hess, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(sum_node->wsum_hess, expected_p, expected_i, 3, 3)); free_expr(sum_node); return 0; diff --git a/tests/wsum_hess/affine/test_left_matmul.h b/tests/wsum_hess/affine/test_left_matmul.h index 313624a..4869dba 100644 --- a/tests/wsum_hess/affine/test_left_matmul.h +++ b/tests/wsum_hess/affine/test_left_matmul.h @@ -53,8 +53,8 @@ const char *test_wsum_hess_left_matmul(void) expr *x = new_variable(3, 1, 0, 3); - /* Create sparse matrix A in CSR format */ - CSR_Matrix *A = new_csr_matrix(4, 3, 7); + /* Create sparse matrix A in CSR_matrix format */ + CSR_matrix *A = new_CSR_matrix(4, 3, 7); int A_p[5] = {0, 2, 4, 6, 7}; int A_i[7] = {0, 2, 0, 2, 0, 2, 0}; double A_x[7] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}; @@ -80,12 +80,11 @@ const char *test_wsum_hess_left_matmul(void) int expected_i[3] = {0, 1, 2}; int expected_p[4] = {0, 1, 2, 3}; /* each row has 1 diagonal entry */ - mu_assert("vals incorrect", - cmp_double_array(A_log_x->wsum_hess->x, expected_x, 3)); - mu_assert("cols incorrect", cmp_int_array(A_log_x->wsum_hess->i, expected_i, 3)); - mu_assert("rows incorrect", cmp_int_array(A_log_x->wsum_hess->p, expected_p, 4)); + mu_assert("vals fail", cmp_values(A_log_x->wsum_hess, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(A_log_x->wsum_hess, expected_p, expected_i, 3, 3)); - free_csr_matrix(A); + free_CSR_matrix(A); free_expr(A_log_x); return 0; } @@ -99,7 +98,7 @@ const char *test_wsum_hess_left_matmul_exp_composite(void) expr *x = new_variable(3, 1, 0, 3); /* Create B matrix (3x3 all ones) */ - CSR_Matrix *B = new_csr_matrix(3, 3, 9); + CSR_matrix *B = new_CSR_matrix(3, 3, 9); int B_p[4] = {0, 3, 6, 9}; int B_i[9] = {0, 1, 2, 0, 1, 2, 0, 1, 2}; double B_x[9] = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; @@ -108,7 +107,7 @@ const char *test_wsum_hess_left_matmul_exp_composite(void) memcpy(B->x, B_x, 9 * sizeof(double)); /* Create A matrix */ - CSR_Matrix *A = new_csr_matrix(4, 3, 7); + CSR_matrix *A = new_CSR_matrix(4, 3, 7); int A_p[5] = {0, 2, 4, 6, 7}; int A_i[7] = {0, 2, 0, 2, 0, 2, 0}; double A_x[7] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}; @@ -123,8 +122,8 @@ const char *test_wsum_hess_left_matmul_exp_composite(void) mu_assert("check_wsum_hess failed", check_wsum_hess(A_exp_Bx, x_vals, w, NUMERICAL_DIFF_DEFAULT_H)); - free_csr_matrix(A); - free_csr_matrix(B); + free_CSR_matrix(A); + free_CSR_matrix(B); free_expr(A_exp_Bx); return 0; } @@ -160,8 +159,8 @@ const char *test_wsum_hess_left_matmul_matrix(void) expr *x = new_variable(3, 2, 0, 6); - /* Create sparse matrix A in CSR format */ - CSR_Matrix *A = new_csr_matrix(4, 3, 7); + /* Create sparse matrix A in CSR_matrix format */ + CSR_matrix *A = new_CSR_matrix(4, 3, 7); int A_p[5] = {0, 2, 4, 6, 7}; int A_i[7] = {0, 2, 0, 2, 0, 2, 0}; double A_x[7] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}; @@ -189,12 +188,34 @@ const char *test_wsum_hess_left_matmul_matrix(void) int expected_i[6] = {0, 1, 2, 3, 4, 5}; int expected_p[7] = {0, 1, 2, 3, 4, 5, 6}; /* each row has 1 diagonal entry */ - mu_assert("vals incorrect", - cmp_double_array(A_log_x->wsum_hess->x, expected_x, 6)); - mu_assert("cols incorrect", cmp_int_array(A_log_x->wsum_hess->i, expected_i, 6)); - mu_assert("rows incorrect", cmp_int_array(A_log_x->wsum_hess->p, expected_p, 7)); + mu_assert("vals fail", cmp_values(A_log_x->wsum_hess, expected_x, 6)); + mu_assert("sparsity fail", + cmp_sparsity(A_log_x->wsum_hess, expected_p, expected_i, 6, 6)); - free_csr_matrix(A); + free_CSR_matrix(A); free_expr(A_log_x); return 0; } + +/* Regression test for the Phase 3 transpose_fill_values omission in + new_left_matmul_dense. Mirrors the Python failure + cp.sum(A @ cp.exp(X)) with X a (2,2) Variable and A a 2x2 numpy array. + eval_wsum_hess reads lnode->AT->X via AT->block_left_mult_vec; before the + fix, AT->X was uninitialized memory (transpose_alloc allocates without + filling) and the analytic Hessian disagreed with finite differences. */ +const char *test_wsum_hess_left_matmul_dense_matrix_exp(void) +{ + double x_vals[4] = {0.5, -0.3, 0.7, -0.2}; + double w[4] = {1.0, 1.0, 1.0, 1.0}; /* cp.sum: unit weight everywhere */ + double A_data[4] = {1.0, 2.0, 3.0, 4.0}; + + expr *X = new_variable(2, 2, 0, 4); + expr *exp_X = new_exp(X); + expr *A_exp_X = new_left_matmul_dense(NULL, exp_X, 2, 2, A_data); + + mu_assert("check_wsum_hess failed", + check_wsum_hess(A_exp_X, x_vals, w, NUMERICAL_DIFF_DEFAULT_H)); + + free_expr(A_exp_X); + return 0; +} diff --git a/tests/wsum_hess/affine/test_right_matmul.h b/tests/wsum_hess/affine/test_right_matmul.h index 2f51b9b..661e8e0 100644 --- a/tests/wsum_hess/affine/test_right_matmul.h +++ b/tests/wsum_hess/affine/test_right_matmul.h @@ -23,8 +23,8 @@ const char *test_wsum_hess_right_matmul(void) expr *x = new_variable(2, 2, 0, 4); - /* Create sparse matrix A in CSR format (2x3) */ - CSR_Matrix *A = new_csr_matrix(2, 3, 4); + /* Create sparse matrix A in CSR_matrix format (2x3) */ + CSR_matrix *A = new_CSR_matrix(2, 3, 4); int A_p[3] = {0, 2, 4}; int A_i[4] = {0, 2, 0, 2}; double A_x[4] = {1.0, 2.0, 3.0, 4.0}; @@ -50,12 +50,11 @@ const char *test_wsum_hess_right_matmul(void) int expected_i[4] = {0, 1, 2, 3}; int expected_p[5] = {0, 1, 2, 3, 4}; /* each row has 1 diagonal entry */ - mu_assert("vals incorrect", - cmp_double_array(log_x_A->wsum_hess->x, expected_x, 4)); - mu_assert("cols incorrect", cmp_int_array(log_x_A->wsum_hess->i, expected_i, 4)); - mu_assert("rows incorrect", cmp_int_array(log_x_A->wsum_hess->p, expected_p, 5)); + mu_assert("vals fail", cmp_values(log_x_A->wsum_hess, expected_x, 4)); + mu_assert("sparsity fail", + cmp_sparsity(log_x_A->wsum_hess, expected_p, expected_i, 4, 4)); - free_csr_matrix(A); + free_CSR_matrix(A); free_expr(log_x_A); return 0; } @@ -73,8 +72,8 @@ const char *test_wsum_hess_right_matmul_vector(void) expr *x = new_variable(1, 3, 0, 3); - /* Create sparse matrix A in CSR format (3x2) */ - CSR_Matrix *A = new_csr_matrix(3, 2, 4); + /* Create sparse matrix A in CSR_matrix format (3x2) */ + CSR_matrix *A = new_CSR_matrix(3, 2, 4); int A_p[4] = {0, 1, 3, 4}; int A_i[4] = {0, 0, 1, 1}; double A_x[4] = {1.0, 2.0, 3.0, 4.0}; @@ -99,12 +98,11 @@ const char *test_wsum_hess_right_matmul_vector(void) int expected_i[3] = {0, 1, 2}; int expected_p[4] = {0, 1, 2, 3}; /* each row has 1 diagonal entry */ - mu_assert("vals incorrect", - cmp_double_array(log_x_A->wsum_hess->x, expected_x, 3)); - mu_assert("cols incorrect", cmp_int_array(log_x_A->wsum_hess->i, expected_i, 3)); - mu_assert("rows incorrect", cmp_int_array(log_x_A->wsum_hess->p, expected_p, 4)); + mu_assert("vals fail", cmp_values(log_x_A->wsum_hess, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(log_x_A->wsum_hess, expected_p, expected_i, 3, 3)); - free_csr_matrix(A); + free_CSR_matrix(A); free_expr(log_x_A); return 0; } diff --git a/tests/wsum_hess/affine/test_scalar_mult.h b/tests/wsum_hess/affine/test_scalar_mult.h index ba6bae9..d08e134 100644 --- a/tests/wsum_hess/affine/test_scalar_mult.h +++ b/tests/wsum_hess/affine/test_scalar_mult.h @@ -38,12 +38,9 @@ const char *test_wsum_hess_scalar_mult_log_vector(void) int expected_p[4] = {0, 1, 2, 3}; int expected_i[3] = {0, 1, 2}; - mu_assert("scalar mult log hess: x values fail", - cmp_double_array(y->wsum_hess->x, expected_x, 3)); - mu_assert("scalar mult log hess: row pointers fail", - cmp_int_array(y->wsum_hess->p, expected_p, 4)); - mu_assert("scalar mult log hess: column indices fail", - cmp_int_array(y->wsum_hess->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(y->wsum_hess, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(y->wsum_hess, expected_p, expected_i, 3, 3)); free_expr(y); return 0; @@ -76,12 +73,9 @@ const char *test_wsum_hess_scalar_mult_log_matrix(void) int expected_p[5] = {0, 1, 2, 3, 4}; int expected_i[4] = {0, 1, 2, 3}; - mu_assert("scalar mult log hess matrix: x values fail", - cmp_double_array(y->wsum_hess->x, expected_x, 4)); - mu_assert("scalar mult log hess matrix: row pointers fail", - cmp_int_array(y->wsum_hess->p, expected_p, 5)); - mu_assert("scalar mult log hess matrix: column indices fail", - cmp_int_array(y->wsum_hess->i, expected_i, 4)); + mu_assert("vals fail", cmp_values(y->wsum_hess, expected_x, 4)); + mu_assert("sparsity fail", + cmp_sparsity(y->wsum_hess, expected_p, expected_i, 4, 4)); free_expr(y); return 0; diff --git a/tests/wsum_hess/affine/test_sum.h b/tests/wsum_hess/affine/test_sum.h index 8ade78b..2c739c9 100644 --- a/tests/wsum_hess/affine/test_sum.h +++ b/tests/wsum_hess/affine/test_sum.h @@ -16,7 +16,7 @@ const char *test_wsum_hess_sum_exp_linear(void) double Ax[6] = {1, 1, 2, 3, 1, -1}; int Ai[6] = {0, 1, 0, 1, 0, 1}; int Ap[4] = {0, 2, 4, 6}; - CSR_Matrix *A = new_csr_matrix(3, 2, 6); + CSR_matrix *A = new_CSR_matrix(3, 2, 6); memcpy(A->x, Ax, 6 * sizeof(double)); memcpy(A->i, Ai, 6 * sizeof(int)); memcpy(A->p, Ap, 4 * sizeof(int)); @@ -32,7 +32,7 @@ const char *test_wsum_hess_sum_exp_linear(void) check_wsum_hess(sum_node, x_vals, &w, NUMERICAL_DIFF_DEFAULT_H)); free_expr(sum_node); - free_csr_matrix(A); + free_CSR_matrix(A); return 0; } @@ -63,12 +63,9 @@ const char *test_wsum_hess_sum_log_axis0(void) int expected_p[7] = {0, 1, 2, 3, 4, 5, 6}; int expected_i[6] = {0, 1, 2, 3, 4, 5}; - mu_assert("vals incorrect", - cmp_double_array(sum_node->wsum_hess->x, expected_x, 6)); - mu_assert("rows incorrect", - cmp_int_array(sum_node->wsum_hess->p, expected_p, 7)); - mu_assert("cols incorrect", - cmp_int_array(sum_node->wsum_hess->i, expected_i, 6)); + mu_assert("vals fail", cmp_values(sum_node->wsum_hess, expected_x, 6)); + mu_assert("sparsity fail", + cmp_sparsity(sum_node->wsum_hess, expected_p, expected_i, 6, 6)); free_expr(sum_node); @@ -101,12 +98,9 @@ const char *test_wsum_hess_sum_log_axis1(void) int expected_p[7] = {0, 1, 2, 3, 4, 5, 6}; int expected_i[6] = {0, 1, 2, 3, 4, 5}; - mu_assert("vals incorrect", - cmp_double_array(sum_node->wsum_hess->x, expected_x, 6)); - mu_assert("rows incorrect", - cmp_int_array(sum_node->wsum_hess->p, expected_p, 7)); - mu_assert("cols incorrect", - cmp_int_array(sum_node->wsum_hess->i, expected_i, 6)); + mu_assert("vals fail", cmp_values(sum_node->wsum_hess, expected_x, 6)); + mu_assert("sparsity fail", + cmp_sparsity(sum_node->wsum_hess, expected_p, expected_i, 6, 6)); free_expr(sum_node); diff --git a/tests/wsum_hess/affine/test_trace.h b/tests/wsum_hess/affine/test_trace.h index bb3f98c..bfbb5e0 100644 --- a/tests/wsum_hess/affine/test_trace.h +++ b/tests/wsum_hess/affine/test_trace.h @@ -70,11 +70,9 @@ const char *test_wsum_hess_trace_log_variable(void) int expected_Ap[14] = {0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9, 9}; int expected_Ai[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - CSR_Matrix *H = trace_node->wsum_hess; - mu_assert("nnz wrong", H->nnz == 9); - mu_assert("vals match", cmp_double_array(H->x, expected_Ax, 9)); - mu_assert("cols match", cmp_int_array(H->i, expected_Ai, 9)); - mu_assert("rows fail", cmp_int_array(H->p, expected_Ap, 14)); + mu_assert("sparsity fail", + cmp_sparsity(trace_node->wsum_hess, expected_Ap, expected_Ai, 13, 9)); + mu_assert("vals fail", cmp_values(trace_node->wsum_hess, expected_Ax, 9)); free_expr(trace_node); return 0; @@ -135,11 +133,9 @@ const char *test_wsum_hess_trace_composite(void) int expected_Ap[14] = {0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9, 9, 9}; int expected_Ai[9] = {1, 2, 3, 4, 5, 6, 7, 8, 9}; - mu_assert("nnz wrong", trace_node->wsum_hess->nnz == 9); - mu_assert("rows fail", cmp_int_array(trace_node->wsum_hess->p, expected_Ap, 14)); - mu_assert("vals match", - cmp_double_array(trace_node->wsum_hess->x, expected_Ax, 9)); - mu_assert("cols match", cmp_int_array(trace_node->wsum_hess->i, expected_Ai, 9)); + mu_assert("sparsity fail", + cmp_sparsity(trace_node->wsum_hess, expected_Ap, expected_Ai, 13, 9)); + mu_assert("vals fail", cmp_values(trace_node->wsum_hess, expected_Ax, 9)); free_expr(trace_node); return 0; } diff --git a/tests/wsum_hess/affine/test_transpose.h b/tests/wsum_hess/affine/test_transpose.h index be91541..ad4e93f 100644 --- a/tests/wsum_hess/affine/test_transpose.h +++ b/tests/wsum_hess/affine/test_transpose.h @@ -27,12 +27,9 @@ const char *test_wsum_hess_transpose(void) int expected_p[9] = {0, 2, 4, 6, 8, 10, 12, 14, 16}; int expected_i[16] = {4, 6, 4, 6, 5, 7, 5, 7, 0, 1, 2, 3, 0, 1, 2, 3}; - mu_assert("hess values fail", - cmp_double_array(XYT->wsum_hess->x, expected_x, 8)); - mu_assert("jacobian row ptr fail", - cmp_int_array(XYT->wsum_hess->p, expected_p, 5)); - mu_assert("jacobian col idx fail", - cmp_int_array(XYT->wsum_hess->i, expected_i, 8)); + mu_assert("vals fail", cmp_values(XYT->wsum_hess, expected_x, 16)); + mu_assert("sparsity fail", + cmp_sparsity(XYT->wsum_hess, expected_p, expected_i, 8, 16)); free_expr(XYT); return 0; diff --git a/tests/wsum_hess/affine/test_upper_tri.h b/tests/wsum_hess/affine/test_upper_tri.h index 418b809..0cff5b6 100644 --- a/tests/wsum_hess/affine/test_upper_tri.h +++ b/tests/wsum_hess/affine/test_upper_tri.h @@ -49,12 +49,9 @@ const char *test_wsum_hess_upper_tri_log(void) int expected_p[17] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; int expected_i[16] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; - mu_assert("upper_tri log hess vals", - cmp_double_array(ut->wsum_hess->x, expected_x, 16)); - mu_assert("upper_tri log hess p", - cmp_int_array(ut->wsum_hess->p, expected_p, 17)); - mu_assert("upper_tri log hess i", - cmp_int_array(ut->wsum_hess->i, expected_i, 16)); + mu_assert("vals fail", cmp_values(ut->wsum_hess, expected_x, 16)); + mu_assert("sparsity fail", + cmp_sparsity(ut->wsum_hess, expected_p, expected_i, 16, 16)); free_expr(ut); return 0; diff --git a/tests/wsum_hess/affine/test_vector_mult.h b/tests/wsum_hess/affine/test_vector_mult.h index 1d75ad3..5d0d140 100644 --- a/tests/wsum_hess/affine/test_vector_mult.h +++ b/tests/wsum_hess/affine/test_vector_mult.h @@ -38,12 +38,9 @@ const char *test_wsum_hess_vector_mult_log_vector(void) int expected_p[4] = {0, 1, 2, 3}; int expected_i[3] = {0, 1, 2}; - mu_assert("vector mult log hess: x values fail", - cmp_double_array(y->wsum_hess->x, expected_x, 3)); - mu_assert("vector mult log hess: row pointers fail", - cmp_int_array(y->wsum_hess->p, expected_p, 4)); - mu_assert("vector mult log hess: column indices fail", - cmp_int_array(y->wsum_hess->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(y->wsum_hess, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(y->wsum_hess, expected_p, expected_i, 3, 3)); free_expr(y); return 0; @@ -76,12 +73,9 @@ const char *test_wsum_hess_vector_mult_log_matrix(void) int expected_p[5] = {0, 1, 2, 3, 4}; int expected_i[4] = {0, 1, 2, 3}; - mu_assert("vector mult log hess matrix: x values fail", - cmp_double_array(y->wsum_hess->x, expected_x, 4)); - mu_assert("vector mult log hess matrix: row pointers fail", - cmp_int_array(y->wsum_hess->p, expected_p, 5)); - mu_assert("vector mult log hess matrix: column indices fail", - cmp_int_array(y->wsum_hess->i, expected_i, 4)); + mu_assert("vals fail", cmp_values(y->wsum_hess, expected_x, 4)); + mu_assert("sparsity fail", + cmp_sparsity(y->wsum_hess, expected_p, expected_i, 4, 4)); free_expr(y); return 0; diff --git a/tests/wsum_hess/affine/test_vstack.h b/tests/wsum_hess/affine/test_vstack.h index f003431..cb473a1 100644 --- a/tests/wsum_hess/affine/test_vstack.h +++ b/tests/wsum_hess/affine/test_vstack.h @@ -39,12 +39,9 @@ const char *test_wsum_hess_vstack_vectors(void) int expected_p[4] = {0, 1, 2, 3}; int expected_i[3] = {0, 1, 2}; - mu_assert("vstack hess vectors: vals", - cmp_double_array(stack->wsum_hess->x, expected_x, 3)); - mu_assert("vstack hess vectors: rows", - cmp_int_array(stack->wsum_hess->p, expected_p, 4)); - mu_assert("vstack hess vectors: cols", - cmp_int_array(stack->wsum_hess->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(stack->wsum_hess, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(stack->wsum_hess, expected_p, expected_i, 3, 3)); free_expr(stack); return 0; @@ -102,12 +99,9 @@ const char *test_wsum_hess_vstack_matrix(void) int expected_p[10] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; int expected_i[9] = {0, 1, 2, 3, 4, 5, 6, 7, 8}; - mu_assert("vstack hess matrix: vals", - cmp_double_array(stack->wsum_hess->x, expected_x, 9)); - mu_assert("vstack hess matrix: rows", - cmp_int_array(stack->wsum_hess->p, expected_p, 10)); - mu_assert("vstack hess matrix: cols", - cmp_int_array(stack->wsum_hess->i, expected_i, 9)); + mu_assert("vals fail", cmp_values(stack->wsum_hess, expected_x, 9)); + mu_assert("sparsity fail", + cmp_sparsity(stack->wsum_hess, expected_p, expected_i, 9, 9)); free_expr(stack); return 0; diff --git a/tests/wsum_hess/bivariate_full_dom/test_matmul.h b/tests/wsum_hess/bivariate_full_dom/test_matmul.h index 61a2d2d..9241609 100644 --- a/tests/wsum_hess/bivariate_full_dom/test_matmul.h +++ b/tests/wsum_hess/bivariate_full_dom/test_matmul.h @@ -49,16 +49,10 @@ const char *test_wsum_hess_matmul(void) Z->eval_wsum_hess(Z, w); /* Verify Hessian dimensions and sparsity */ - mu_assert("Hessian should be 18x18", Z->wsum_hess->m == n_vars); - mu_assert("Hessian should be 18x18", Z->wsum_hess->n == n_vars); - mu_assert("Hessian should have 48 nonzeros", Z->wsum_hess->nnz == 48); + mu_assert("Hessian should be 18 cols", Z->wsum_hess->n == n_vars); int expected_p[19] = {0, 4, 8, 12, 16, 20, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48}; - - mu_assert("Row pointers incorrect", - cmp_int_array(Z->wsum_hess->p, expected_p, 19)); - int expected_i[48] = {6, 9, 12, 15, /* row 0 */ 6, 9, 12, 15, /* row 1 */ 7, 10, 13, 16, /* row 2 */ @@ -78,8 +72,8 @@ const char *test_wsum_hess_matmul(void) 2, 3, /* row 16*/ 4, 5}; - mu_assert("Column indices incorrect", - cmp_int_array(Z->wsum_hess->i, expected_i, 48)); + mu_assert("sparsity fail", + cmp_sparsity(Z->wsum_hess, expected_p, expected_i, n_vars, 48)); double expected_x[48] = {1.0, 3.0, 5.0, 7.0, /* row 0 */ 2.0, 4.0, 6.0, 8.0, /* row 1 */ @@ -100,8 +94,7 @@ const char *test_wsum_hess_matmul(void) 7.0, 8.0, /* row 16 */ 7.0, 8.0}; /* row 17 */ - mu_assert("Hessian values incorrect", - cmp_double_array(Z->wsum_hess->x, expected_x, 48)); + mu_assert("vals fail", cmp_values(Z->wsum_hess, expected_x, 48)); free_expr(Z); return 0; @@ -150,9 +143,7 @@ const char *test_wsum_hess_matmul_yx(void) Z->eval_wsum_hess(Z, w); /* Verify Hessian dimensions and sparsity */ - mu_assert("Hessian should be 18x18", Z->wsum_hess->m == n_vars); - mu_assert("Hessian should be 18x18", Z->wsum_hess->n == n_vars); - mu_assert("Hessian should have 48 nonzeros", Z->wsum_hess->nnz == 48); + mu_assert("Hessian should be 18 cols", Z->wsum_hess->n == n_vars); /* Row pointers when Y < X: * Rows 0-11 (Y variables): each couples with m=2 X variables @@ -161,9 +152,6 @@ const char *test_wsum_hess_matmul_yx(void) int expected_p[19] = {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 28, 32, 36, 40, 44, 48}; - mu_assert("Row pointers incorrect", - cmp_int_array(Z->wsum_hess->p, expected_p, 19)); - /* Column indices when Y < X: * Y[k_idx, col] couples with X[row, k_idx] for all row * X variable index = 12 + row + k_idx*m @@ -189,8 +177,8 @@ const char *test_wsum_hess_matmul_yx(void) 2, 5, 8, 11, /* row 16: X[0,2] */ 2, 5, 8, 11}; /* row 17: X[1,2] */ - mu_assert("Column indices incorrect", - cmp_int_array(Z->wsum_hess->i, expected_i, 48)); + mu_assert("sparsity fail", + cmp_sparsity(Z->wsum_hess, expected_p, expected_i, n_vars, 48)); double expected_x[48] = {1.0, 2.0, /* row 0 */ 1.0, 2.0, /* row 1 */ @@ -211,8 +199,7 @@ const char *test_wsum_hess_matmul_yx(void) 1.0, 3.0, 5.0, 7.0, /* row 16 */ 2.0, 4.0, 6.0, 8.0}; /* row 17 */ - mu_assert("Hessian values incorrect", - cmp_double_array(Z->wsum_hess->x, expected_x, 48)); + mu_assert("vals fail", cmp_values(Z->wsum_hess, expected_x, 48)); free_expr(Z); return 0; diff --git a/tests/wsum_hess/bivariate_full_dom/test_multiply.h b/tests/wsum_hess/bivariate_full_dom/test_multiply.h index e85c7a1..a7c01fc 100644 --- a/tests/wsum_hess/bivariate_full_dom/test_multiply.h +++ b/tests/wsum_hess/bivariate_full_dom/test_multiply.h @@ -30,9 +30,9 @@ const char *test_wsum_hess_multiply_1(void) int expected_i[6] = {8, 9, 10, 3, 4, 5}; double expected_x[6] = {1.0, 2.0, 3.0, 1.0, 2.0, 3.0}; - mu_assert("p array fails", cmp_int_array(node->wsum_hess->p, expected_p, 13)); - mu_assert("i array fails", cmp_int_array(node->wsum_hess->i, expected_i, 6)); - mu_assert("x array fails", cmp_double_array(node->wsum_hess->x, expected_x, 6)); + mu_assert("sparsity fail", + cmp_sparsity(node->wsum_hess, expected_p, expected_i, 12, 6)); + mu_assert("vals fail", cmp_values(node->wsum_hess, expected_x, 6)); free_expr(node); return 0; @@ -41,14 +41,14 @@ const char *test_wsum_hess_multiply_1(void) const char *test_wsum_hess_multiply_sparse_random(void) { /* Test with larger random sparse matrices - * A: 5x10 CSR matrix - * B: 5x10 CSR matrix + * A: 5x10 CSR_matrix matrix + * B: 5x10 CSR_matrix matrix * x: 10-dimensional variable with var_id = 0, n_vars = 10 * Expected Hessian: 10x10 sparse matrix */ /* Create A matrix (5x10) */ - CSR_Matrix *A = new_csr_matrix(5, 10, 10); + CSR_matrix *A = new_CSR_matrix(5, 10, 10); double Ax[10] = {-1.44165273, -1.13687223, 0.55892257, 0.24912193, 0.84959744, -0.23998915, 0.5913356, -1.21627912, -0.50379166, 0.41531801}; int Ai[10] = {1, 2, 4, 8, 2, 3, 8, 9, 1, 2}; @@ -58,7 +58,7 @@ const char *test_wsum_hess_multiply_sparse_random(void) memcpy(A->p, Ap, 6 * sizeof(int)); /* Create B matrix (5x10) */ - CSR_Matrix *B = new_csr_matrix(5, 10, 10); + CSR_matrix *B = new_CSR_matrix(5, 10, 10); double Bx[10] = {1.27549062, 0.04194731, -0.4356034, 0.405574, 1.34670487, -0.57738638, 0.9411464, -0.31563179, 1.90831766, -0.89802958}; int Bi[10] = {0, 3, 5, 7, 0, 5, 0, 3, 7, 9}; @@ -85,7 +85,7 @@ const char *test_wsum_hess_multiply_sparse_random(void) double w[5] = {0.50646339, 0.44756224, 0.67295241, 0.16424956, 0.03031469}; mult_node->eval_wsum_hess(mult_node, w); - /* Expected Hessian in CSR format (10x10) */ + /* Expected Hessian in CSR_matrix format (10x10) */ int expected_p[11] = {0, 6, 9, 13, 18, 19, 20, 20, 22, 25, 29}; int expected_i[29] = {1, 2, 3, 4, 8, 9, 0, 7, 9, 0, 3, 7, 9, 0, 2, 3, 8, 9, 0, 8, 1, 2, 0, 3, 5, 0, 1, 2, 3}; @@ -96,17 +96,14 @@ const char *test_wsum_hess_multiply_sparse_random(void) 0.361058, -0.09679721, -0.02914438, 0.02402617, 0.31718166, -0.03065625, -0.09679721, -0.18801593, 0.01371497, -0.01130641, 0.06305481}; - mu_assert("p array fails", - cmp_int_array(mult_node->wsum_hess->p, expected_p, 11)); - mu_assert("i array fails", - cmp_int_array(mult_node->wsum_hess->i, expected_i, 29)); - mu_assert("x array fails", - cmp_double_array(mult_node->wsum_hess->x, expected_x, 29)); + mu_assert("sparsity fail", + cmp_sparsity(mult_node->wsum_hess, expected_p, expected_i, 10, 29)); + mu_assert("vals fail", cmp_values(mult_node->wsum_hess, expected_x, 29)); /* Cleanup */ free_expr(mult_node); - free_csr_matrix(A); - free_csr_matrix(B); + free_CSR_matrix(A); + free_CSR_matrix(B); return 0; } @@ -131,8 +128,8 @@ const char *test_wsum_hess_multiply_linear_ops(void) * [ 75. 18. 76.]] */ - /* Create CSR matrix A */ - CSR_Matrix *A = new_csr_matrix(4, 3, 6); + /* Create CSR_matrix matrix A */ + CSR_matrix *A = new_CSR_matrix(4, 3, 6); double Ax[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; int Ai[6] = {0, 2, 1, 0, 2, 1}; int Ap[5] = {0, 2, 3, 5, 6}; @@ -140,8 +137,8 @@ const char *test_wsum_hess_multiply_linear_ops(void) memcpy(A->i, Ai, 6 * sizeof(int)); memcpy(A->p, Ap, 5 * sizeof(int)); - /* Create CSR matrix B */ - CSR_Matrix *B = new_csr_matrix(4, 3, 8); + /* Create CSR_matrix matrix B */ + CSR_matrix *B = new_CSR_matrix(4, 3, 8); double Bx[8] = {1.0, 4.0, 2.0, 7.0, 3.0, 2.0, 4.0, -1.0}; int Bi[8] = {0, 2, 1, 2, 0, 2, 1, 2}; int Bp[5] = {0, 2, 4, 6, 8}; @@ -170,7 +167,7 @@ const char *test_wsum_hess_multiply_linear_ops(void) mult_node->eval_wsum_hess(mult_node, w); /* Check sparsity pattern and values */ - /* Expected CSR format: + /* Expected CSR_matrix format: * indptr: [0, 2, 4, 7] * indices: [0, 2, 1, 2, 0, 1, 2] * data: [74.0, 75.0, 216.0, 18.0, 75.0, 18.0, 76.0] @@ -179,17 +176,14 @@ const char *test_wsum_hess_multiply_linear_ops(void) int expected_i[7] = {0, 2, 1, 2, 0, 1, 2}; double expected_x[7] = {74.0, 75.0, 216.0, 18.0, 75.0, 18.0, 76.0}; - mu_assert("p array fails", - cmp_int_array(mult_node->wsum_hess->p, expected_p, 4)); - mu_assert("i array fails", - cmp_int_array(mult_node->wsum_hess->i, expected_i, 7)); - mu_assert("x array fails", - cmp_double_array(mult_node->wsum_hess->x, expected_x, 7)); + mu_assert("sparsity fail", + cmp_sparsity(mult_node->wsum_hess, expected_p, expected_i, 3, 7)); + mu_assert("vals fail", cmp_values(mult_node->wsum_hess, expected_x, 7)); /* Cleanup */ free_expr(mult_node); - free_csr_matrix(A); - free_csr_matrix(B); + free_CSR_matrix(A); + free_CSR_matrix(B); return 0; } @@ -219,9 +213,9 @@ const char *test_wsum_hess_multiply_2(void) int expected_i[6] = {8, 9, 10, 3, 4, 5}; double expected_x[6] = {1.0, 2.0, 3.0, 1.0, 2.0, 3.0}; - mu_assert("p array fails", cmp_int_array(node->wsum_hess->p, expected_p, 13)); - mu_assert("i array fails", cmp_int_array(node->wsum_hess->i, expected_i, 6)); - mu_assert("x array fails", cmp_double_array(node->wsum_hess->x, expected_x, 6)); + mu_assert("sparsity fail", + cmp_sparsity(node->wsum_hess, expected_p, expected_i, 12, 6)); + mu_assert("vals fail", cmp_values(node->wsum_hess, expected_x, 6)); free_expr(node); return 0; diff --git a/tests/wsum_hess/bivariate_restricted_dom/test_quad_over_lin.h b/tests/wsum_hess/bivariate_restricted_dom/test_quad_over_lin.h index f1053e2..2cb5dc4 100644 --- a/tests/wsum_hess/bivariate_restricted_dom/test_quad_over_lin.h +++ b/tests/wsum_hess/bivariate_restricted_dom/test_quad_over_lin.h @@ -26,9 +26,9 @@ const char *test_wsum_hess_quad_over_lin_xy(void) double expected_x[10] = {1.0, -0.25, 1.0, -0.5, 1.0, -0.75, -0.25, -0.5, -0.75, 0.875}; - mu_assert("p array fails", cmp_int_array(node->wsum_hess->p, expected_p, 10)); - mu_assert("i array fails", cmp_int_array(node->wsum_hess->i, expected_i, 10)); - mu_assert("x array fails", cmp_double_array(node->wsum_hess->x, expected_x, 10)); + mu_assert("sparsity fail", + cmp_sparsity(node->wsum_hess, expected_p, expected_i, 9, 10)); + mu_assert("vals fail", cmp_values(node->wsum_hess, expected_x, 10)); free_expr(node); return 0; @@ -56,9 +56,9 @@ const char *test_wsum_hess_quad_over_lin_yx(void) double expected_x[10] = {0.875, -0.25, -0.5, -0.75, -0.25, 1.0, -0.5, 1.0, -0.75, 1.0}; - mu_assert("p array fails", cmp_int_array(node->wsum_hess->p, expected_p, 10)); - mu_assert("i array fails", cmp_int_array(node->wsum_hess->i, expected_i, 10)); - mu_assert("x array fails", cmp_double_array(node->wsum_hess->x, expected_x, 10)); + mu_assert("sparsity fail", + cmp_sparsity(node->wsum_hess, expected_p, expected_i, 9, 10)); + mu_assert("vals fail", cmp_values(node->wsum_hess, expected_x, 10)); free_expr(node); return 0; diff --git a/tests/wsum_hess/bivariate_restricted_dom/test_rel_entr.h b/tests/wsum_hess/bivariate_restricted_dom/test_rel_entr.h index ce2137a..5c3adde 100644 --- a/tests/wsum_hess/bivariate_restricted_dom/test_rel_entr.h +++ b/tests/wsum_hess/bivariate_restricted_dom/test_rel_entr.h @@ -30,9 +30,9 @@ const char *test_wsum_hess_rel_entr_1(void) double expected_x[12] = {1.0, -0.25, 1.0, -0.4, 1.0, -0.5, -0.25, 0.0625, -0.4, 0.16, -0.5, 0.25}; - mu_assert("p array fails", cmp_int_array(node->wsum_hess->p, expected_p, 11)); - mu_assert("i array fails", cmp_int_array(node->wsum_hess->i, expected_i, 12)); - mu_assert("x array fails", cmp_double_array(node->wsum_hess->x, expected_x, 12)); + mu_assert("sparsity fail", + cmp_sparsity(node->wsum_hess, expected_p, expected_i, 10, 12)); + mu_assert("vals fail", cmp_values(node->wsum_hess, expected_x, 12)); free_expr(node); return 0; @@ -62,9 +62,9 @@ const char *test_wsum_hess_rel_entr_2(void) double expected_x[12] = {0.0625, -0.25, 0.16, -0.4, 0.25, -0.5, -0.25, 1.0, -0.4, 1.0, -0.5, 1.0}; - mu_assert("p array fails", cmp_int_array(node->wsum_hess->p, expected_p, 11)); - mu_assert("i array fails", cmp_int_array(node->wsum_hess->i, expected_i, 12)); - mu_assert("x array fails", cmp_double_array(node->wsum_hess->x, expected_x, 12)); + mu_assert("sparsity fail", + cmp_sparsity(node->wsum_hess, expected_p, expected_i, 10, 12)); + mu_assert("vals fail", cmp_values(node->wsum_hess, expected_x, 12)); free_expr(node); return 0; @@ -98,9 +98,9 @@ const char *test_wsum_hess_rel_entr_matrix(void) -1.0 / 6.0, 1.0 / 36.0, -0.4, 0.16, -0.75, 0.5625, -4.0 / 3.0, 1.7777777777777777, -2.5, 6.25, -6.0, 36.0}; - mu_assert("p array fails", cmp_int_array(node->wsum_hess->p, expected_p, 13)); - mu_assert("i array fails", cmp_int_array(node->wsum_hess->i, expected_i, 24)); - mu_assert("x array fails", cmp_double_array(node->wsum_hess->x, expected_x, 24)); + mu_assert("sparsity fail", + cmp_sparsity(node->wsum_hess, expected_p, expected_i, 12, 24)); + mu_assert("vals fail", cmp_values(node->wsum_hess, expected_x, 24)); free_expr(node); return 0; diff --git a/tests/wsum_hess/bivariate_restricted_dom/test_rel_entr_scalar_vector.h b/tests/wsum_hess/bivariate_restricted_dom/test_rel_entr_scalar_vector.h index a431b96..74ba8b7 100644 --- a/tests/wsum_hess/bivariate_restricted_dom/test_rel_entr_scalar_vector.h +++ b/tests/wsum_hess/bivariate_restricted_dom/test_rel_entr_scalar_vector.h @@ -25,9 +25,9 @@ const char *test_wsum_hess_rel_entr_scalar_vector(void) double expected_x[10] = {15.0, -2.0, -5.0 / 3.0, -1.5, -2.0, 1.0, -5.0 / 3.0, 5.0 / 9.0, -1.5, 0.375}; - mu_assert("p array fails", cmp_int_array(node->wsum_hess->p, expected_p, 5)); - mu_assert("i array fails", cmp_int_array(node->wsum_hess->i, expected_i, 10)); - mu_assert("x array fails", cmp_double_array(node->wsum_hess->x, expected_x, 10)); + mu_assert("sparsity fail", + cmp_sparsity(node->wsum_hess, expected_p, expected_i, 4, 10)); + mu_assert("vals fail", cmp_values(node->wsum_hess, expected_x, 10)); free_expr(node); return 0; diff --git a/tests/wsum_hess/bivariate_restricted_dom/test_rel_entr_vector_scalar.h b/tests/wsum_hess/bivariate_restricted_dom/test_rel_entr_vector_scalar.h index 60b9436..d74e92b 100644 --- a/tests/wsum_hess/bivariate_restricted_dom/test_rel_entr_vector_scalar.h +++ b/tests/wsum_hess/bivariate_restricted_dom/test_rel_entr_vector_scalar.h @@ -25,9 +25,9 @@ const char *test_wsum_hess_rel_entr_vector_scalar(void) double expected_x[10] = {1.0, -0.25, 1.0, -0.5, 1.0, -0.75, -0.25, -0.5, -0.75, 0.875}; - mu_assert("p array fails", cmp_int_array(node->wsum_hess->p, expected_p, 5)); - mu_assert("i array fails", cmp_int_array(node->wsum_hess->i, expected_i, 10)); - mu_assert("x array fails", cmp_double_array(node->wsum_hess->x, expected_x, 10)); + mu_assert("sparsity fail", + cmp_sparsity(node->wsum_hess, expected_p, expected_i, 4, 10)); + mu_assert("vals fail", cmp_values(node->wsum_hess, expected_x, 10)); free_expr(node); return 0; diff --git a/tests/wsum_hess/composite/test_chain_rule_wsum_hess.h b/tests/wsum_hess/composite/test_chain_rule_wsum_hess.h index c5c1a60..6fbff2d 100644 --- a/tests/wsum_hess/composite/test_chain_rule_wsum_hess.h +++ b/tests/wsum_hess/composite/test_chain_rule_wsum_hess.h @@ -123,8 +123,8 @@ const char *test_wsum_hess_Ax_Bx_multiply(void) double u_vals[4] = {1.0, 2.0, 3.0, 4.0}; double w[2] = {1.33, 2.1}; - CSR_Matrix *A = new_csr_random(2, 2, 1.0); - CSR_Matrix *B = new_csr_random(2, 2, 1.0); + CSR_matrix *A = new_csr_random(2, 2, 1.0); + CSR_matrix *B = new_csr_random(2, 2, 1.0); expr *x = new_variable(2, 1, 1, 4); expr *Ax = new_left_matmul(NULL, x, A); expr *Bx = new_left_matmul(NULL, x, B); @@ -134,8 +134,8 @@ const char *test_wsum_hess_Ax_Bx_multiply(void) check_wsum_hess(multiply, u_vals, w, NUMERICAL_DIFF_DEFAULT_H)); free_expr(multiply); - free_csr_matrix(A); - free_csr_matrix(B); + free_CSR_matrix(A); + free_CSR_matrix(B); return 0; } @@ -159,8 +159,8 @@ const char *test_wsum_hess_AX_BX_multiply(void) double u_vals[4] = {1.0, 2.0, 3.0, 4.0}; double w[4] = {1.1, 2.2, 3.3, 4.4}; - CSR_Matrix *A = new_csr_random(2, 2, 1.0); - CSR_Matrix *B = new_csr_random(2, 2, 1.0); + CSR_matrix *A = new_csr_random(2, 2, 1.0); + CSR_matrix *B = new_csr_random(2, 2, 1.0); expr *X = new_variable(2, 2, 0, 4); expr *AX = new_left_matmul(NULL, X, A); expr *BX = new_left_matmul(NULL, X, B); @@ -170,8 +170,8 @@ const char *test_wsum_hess_AX_BX_multiply(void) check_wsum_hess(multiply, u_vals, w, NUMERICAL_DIFF_DEFAULT_H)); free_expr(multiply); - free_csr_matrix(A); - free_csr_matrix(B); + free_CSR_matrix(A); + free_CSR_matrix(B); return 0; } @@ -180,8 +180,8 @@ const char *test_wsum_hess_multiply_deep_composite(void) double u_vals[4] = {1.0, 2.0, 3.0, 4.0}; double w[4] = {1.1, 2.2, 3.3, 4.4}; - CSR_Matrix *A = new_csr_random(2, 2, 1.0); - CSR_Matrix *B = new_csr_random(2, 2, 1.0); + CSR_matrix *A = new_csr_random(2, 2, 1.0); + CSR_matrix *B = new_csr_random(2, 2, 1.0); expr *X = new_variable(2, 2, 0, 8); expr *Y = new_variable(2, 2, 0, 8); expr *AX = new_left_matmul(NULL, X, A); @@ -195,8 +195,8 @@ const char *test_wsum_hess_multiply_deep_composite(void) check_wsum_hess(multiply, u_vals, w, NUMERICAL_DIFF_DEFAULT_H)); free_expr(multiply); - free_csr_matrix(A); - free_csr_matrix(B); + free_CSR_matrix(A); + free_CSR_matrix(B); return 0; } @@ -205,10 +205,10 @@ const char *test_wsum_hess_quad_form_Ax(void) double u_vals[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; double w = 1.0; - CSR_Matrix *A = new_csr_random(3, 4, 1.0); + CSR_matrix *A = new_csr_random(3, 4, 1.0); /* Q = [1 2 0; 2 3 0; 0 0 4] (symmetric) */ - CSR_Matrix *Q = new_csr_matrix(3, 3, 5); + CSR_matrix *Q = new_CSR_matrix(3, 3, 5); double Qx[5] = {1.0, 2.0, 2.0, 3.0, 4.0}; int Qi[5] = {0, 1, 0, 1, 2}; int Qp[4] = {0, 2, 4, 5}; @@ -224,8 +224,8 @@ const char *test_wsum_hess_quad_form_Ax(void) check_wsum_hess(node, u_vals, &w, NUMERICAL_DIFF_DEFAULT_H)); free_expr(node); - free_csr_matrix(A); - free_csr_matrix(Q); + free_CSR_matrix(A); + free_CSR_matrix(Q); return 0; } @@ -234,10 +234,10 @@ const char *test_wsum_hess_quad_form_sin_Ax(void) double u_vals[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; double w = 2.0; - CSR_Matrix *A = new_csr_random(3, 4, 1.0); + CSR_matrix *A = new_csr_random(3, 4, 1.0); /* Q = [1 2 0; 2 3 0; 0 0 4] (symmetric) */ - CSR_Matrix *Q = new_csr_matrix(3, 3, 5); + CSR_matrix *Q = new_CSR_matrix(3, 3, 5); double Qx[5] = {1.0, 2.0, 2.0, 3.0, 4.0}; int Qi[5] = {0, 1, 0, 1, 2}; int Qp[4] = {0, 2, 4, 5}; @@ -254,8 +254,8 @@ const char *test_wsum_hess_quad_form_sin_Ax(void) check_wsum_hess(node, u_vals, &w, NUMERICAL_DIFF_DEFAULT_H)); free_expr(node); - free_csr_matrix(A); - free_csr_matrix(Q); + free_CSR_matrix(A); + free_CSR_matrix(Q); return 0; } @@ -303,8 +303,8 @@ const char *test_wsum_hess_matmul_Ax_By(void) double u_vals[10] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0}; double w[6] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}; - CSR_Matrix *A = new_csr_random(3, 2, 1.0); - CSR_Matrix *B = new_csr_random(2, 3, 1.0); + CSR_matrix *A = new_csr_random(3, 2, 1.0); + CSR_matrix *B = new_csr_random(2, 3, 1.0); expr *X = new_variable(2, 2, 0, 10); expr *Y = new_variable(3, 2, 4, 10); @@ -316,8 +316,8 @@ const char *test_wsum_hess_matmul_Ax_By(void) check_wsum_hess(Z, u_vals, w, NUMERICAL_DIFF_DEFAULT_H)); free_expr(Z); - free_csr_matrix(A); - free_csr_matrix(B); + free_CSR_matrix(A); + free_CSR_matrix(B); return 0; } @@ -327,8 +327,8 @@ const char *test_wsum_hess_matmul_sin_Ax_cos_Bx(void) double u_vals[6] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0}; double w[4] = {1.0, 2.0, 3.0, 4.0}; - CSR_Matrix *A = new_csr_random(2, 3, 1.0); - CSR_Matrix *B = new_csr_random(2, 3, 1.0); + CSR_matrix *A = new_csr_random(2, 3, 1.0); + CSR_matrix *B = new_csr_random(2, 3, 1.0); expr *X = new_variable(3, 2, 0, 6); expr *AX = new_left_matmul(NULL, X, A); /* 2x2 */ @@ -341,8 +341,8 @@ const char *test_wsum_hess_matmul_sin_Ax_cos_Bx(void) check_wsum_hess(Z, u_vals, w, NUMERICAL_DIFF_DEFAULT_H)); free_expr(Z); - free_csr_matrix(A); - free_csr_matrix(B); + free_CSR_matrix(A); + free_CSR_matrix(B); return 0; } @@ -368,7 +368,7 @@ const char *test_wsum_hess_quad_form_exp(void) double w = 3.0; /* Q = [1 2 0; 2 3 0; 0 0 4] (symmetric) */ - CSR_Matrix *Q = new_csr_matrix(3, 3, 5); + CSR_matrix *Q = new_CSR_matrix(3, 3, 5); double Qx[5] = {1.0, 2.0, 2.0, 3.0, 4.0}; int Qi[5] = {0, 1, 0, 1, 2}; int Qp[4] = {0, 2, 4, 5}; @@ -384,6 +384,6 @@ const char *test_wsum_hess_quad_form_exp(void) check_wsum_hess(node, u_vals, &w, NUMERICAL_DIFF_DEFAULT_H)); free_expr(node); - free_csr_matrix(Q); + free_CSR_matrix(Q); return 0; } diff --git a/tests/wsum_hess/elementwise_full_dom/test_exp.h b/tests/wsum_hess/elementwise_full_dom/test_exp.h index be00553..891eae0 100644 --- a/tests/wsum_hess/elementwise_full_dom/test_exp.h +++ b/tests/wsum_hess/elementwise_full_dom/test_exp.h @@ -27,12 +27,9 @@ const char *test_wsum_hess_exp(void) int expected_p[4] = {0, 1, 2, 3}; int expected_i[3] = {0, 1, 2}; - mu_assert("vals incorrect", - cmp_double_array(exp_node->wsum_hess->x, expected_x, 3)); - mu_assert("rows incorrect", - cmp_int_array(exp_node->wsum_hess->p, expected_p, 4)); - mu_assert("cols incorrect", - cmp_int_array(exp_node->wsum_hess->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(exp_node->wsum_hess, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(exp_node->wsum_hess, expected_p, expected_i, 3, 3)); free_expr(exp_node); diff --git a/tests/wsum_hess/elementwise_full_dom/test_hyperbolic.h b/tests/wsum_hess/elementwise_full_dom/test_hyperbolic.h index 88a655f..cf543a7 100644 --- a/tests/wsum_hess/elementwise_full_dom/test_hyperbolic.h +++ b/tests/wsum_hess/elementwise_full_dom/test_hyperbolic.h @@ -34,12 +34,9 @@ const char *test_wsum_hess_sinh(void) int expected_p[4] = {0, 1, 2, 3}; int expected_i[3] = {0, 1, 2}; - mu_assert("vals incorrect", - cmp_double_array(sinh_node->wsum_hess->x, expected_x, 3)); - mu_assert("rows incorrect", - cmp_int_array(sinh_node->wsum_hess->p, expected_p, 4)); - mu_assert("cols incorrect", - cmp_int_array(sinh_node->wsum_hess->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(sinh_node->wsum_hess, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(sinh_node->wsum_hess, expected_p, expected_i, 3, 3)); free_expr(sinh_node); @@ -72,12 +69,9 @@ const char *test_wsum_hess_tanh(void) int expected_p[4] = {0, 1, 2, 3}; int expected_i[3] = {0, 1, 2}; - mu_assert("vals incorrect", - cmp_double_array(tanh_node->wsum_hess->x, expected_x, 3)); - mu_assert("rows incorrect", - cmp_int_array(tanh_node->wsum_hess->p, expected_p, 4)); - mu_assert("cols incorrect", - cmp_int_array(tanh_node->wsum_hess->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(tanh_node->wsum_hess, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(tanh_node->wsum_hess, expected_p, expected_i, 3, 3)); free_expr(tanh_node); @@ -110,12 +104,9 @@ const char *test_wsum_hess_asinh(void) int expected_p[4] = {0, 1, 2, 3}; int expected_i[3] = {0, 1, 2}; - mu_assert("vals incorrect", - cmp_double_array(asinh_node->wsum_hess->x, expected_x, 3)); - mu_assert("rows incorrect", - cmp_int_array(asinh_node->wsum_hess->p, expected_p, 4)); - mu_assert("cols incorrect", - cmp_int_array(asinh_node->wsum_hess->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(asinh_node->wsum_hess, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(asinh_node->wsum_hess, expected_p, expected_i, 3, 3)); free_expr(asinh_node); @@ -149,12 +140,9 @@ const char *test_wsum_hess_atanh(void) int expected_p[4] = {0, 1, 2, 3}; int expected_i[3] = {0, 1, 2}; - mu_assert("vals incorrect", - cmp_double_array(atanh_node->wsum_hess->x, expected_x, 3)); - mu_assert("rows incorrect", - cmp_int_array(atanh_node->wsum_hess->p, expected_p, 4)); - mu_assert("cols incorrect", - cmp_int_array(atanh_node->wsum_hess->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(atanh_node->wsum_hess, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(atanh_node->wsum_hess, expected_p, expected_i, 3, 3)); free_expr(atanh_node); diff --git a/tests/wsum_hess/elementwise_full_dom/test_logistic.h b/tests/wsum_hess/elementwise_full_dom/test_logistic.h index 2de6070..025ec8c 100644 --- a/tests/wsum_hess/elementwise_full_dom/test_logistic.h +++ b/tests/wsum_hess/elementwise_full_dom/test_logistic.h @@ -42,12 +42,9 @@ const char *test_wsum_hess_logistic(void) int expected_p[4] = {0, 1, 2, 3}; int expected_i[3] = {0, 1, 2}; - mu_assert("vals incorrect", - cmp_double_array(logistic_node->wsum_hess->x, expected_x, 3)); - mu_assert("rows incorrect", - cmp_int_array(logistic_node->wsum_hess->p, expected_p, 4)); - mu_assert("cols incorrect", - cmp_int_array(logistic_node->wsum_hess->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(logistic_node->wsum_hess, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(logistic_node->wsum_hess, expected_p, expected_i, 3, 3)); free_expr(logistic_node); diff --git a/tests/wsum_hess/elementwise_full_dom/test_power.h b/tests/wsum_hess/elementwise_full_dom/test_power.h index c21021e..1456bef 100644 --- a/tests/wsum_hess/elementwise_full_dom/test_power.h +++ b/tests/wsum_hess/elementwise_full_dom/test_power.h @@ -27,12 +27,9 @@ const char *test_wsum_hess_power(void) int expected_p[4] = {0, 1, 2, 3}; int expected_i[3] = {0, 1, 2}; - mu_assert("vals incorrect", - cmp_double_array(power_node->wsum_hess->x, expected_x, 3)); - mu_assert("rows incorrect", - cmp_int_array(power_node->wsum_hess->p, expected_p, 4)); - mu_assert("cols incorrect", - cmp_int_array(power_node->wsum_hess->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(power_node->wsum_hess, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(power_node->wsum_hess, expected_p, expected_i, 3, 3)); free_expr(power_node); diff --git a/tests/wsum_hess/elementwise_full_dom/test_trig.h b/tests/wsum_hess/elementwise_full_dom/test_trig.h index 51b1422..550c272 100644 --- a/tests/wsum_hess/elementwise_full_dom/test_trig.h +++ b/tests/wsum_hess/elementwise_full_dom/test_trig.h @@ -27,12 +27,9 @@ const char *test_wsum_hess_sin(void) int expected_p[4] = {0, 1, 2, 3}; int expected_i[3] = {0, 1, 2}; - mu_assert("vals incorrect", - cmp_double_array(sin_node->wsum_hess->x, expected_x, 3)); - mu_assert("rows incorrect", - cmp_int_array(sin_node->wsum_hess->p, expected_p, 4)); - mu_assert("cols incorrect", - cmp_int_array(sin_node->wsum_hess->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(sin_node->wsum_hess, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(sin_node->wsum_hess, expected_p, expected_i, 3, 3)); free_expr(sin_node); @@ -56,12 +53,9 @@ const char *test_wsum_hess_cos(void) int expected_p[4] = {0, 1, 2, 3}; int expected_i[3] = {0, 1, 2}; - mu_assert("vals incorrect", - cmp_double_array(cos_node->wsum_hess->x, expected_x, 3)); - mu_assert("rows incorrect", - cmp_int_array(cos_node->wsum_hess->p, expected_p, 4)); - mu_assert("cols incorrect", - cmp_int_array(cos_node->wsum_hess->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(cos_node->wsum_hess, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(cos_node->wsum_hess, expected_p, expected_i, 3, 3)); free_expr(cos_node); @@ -87,12 +81,9 @@ const char *test_wsum_hess_tan(void) int expected_p[4] = {0, 1, 2, 3}; int expected_i[3] = {0, 1, 2}; - mu_assert("vals incorrect", - cmp_double_array(tan_node->wsum_hess->x, expected_x, 3)); - mu_assert("rows incorrect", - cmp_int_array(tan_node->wsum_hess->p, expected_p, 4)); - mu_assert("cols incorrect", - cmp_int_array(tan_node->wsum_hess->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(tan_node->wsum_hess, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(tan_node->wsum_hess, expected_p, expected_i, 3, 3)); free_expr(tan_node); diff --git a/tests/wsum_hess/elementwise_full_dom/test_xexp.h b/tests/wsum_hess/elementwise_full_dom/test_xexp.h index c4c6f94..55e80ef 100644 --- a/tests/wsum_hess/elementwise_full_dom/test_xexp.h +++ b/tests/wsum_hess/elementwise_full_dom/test_xexp.h @@ -28,12 +28,9 @@ const char *test_wsum_hess_xexp(void) int expected_p[4] = {0, 1, 2, 3}; int expected_i[3] = {0, 1, 2}; - mu_assert("vals incorrect", - cmp_double_array(xexp_node->wsum_hess->x, expected_x, 3)); - mu_assert("rows incorrect", - cmp_int_array(xexp_node->wsum_hess->p, expected_p, 4)); - mu_assert("cols incorrect", - cmp_int_array(xexp_node->wsum_hess->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(xexp_node->wsum_hess, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(xexp_node->wsum_hess, expected_p, expected_i, 3, 3)); free_expr(xexp_node); diff --git a/tests/wsum_hess/elementwise_restricted_dom/test_entr.h b/tests/wsum_hess/elementwise_restricted_dom/test_entr.h index 1f4fb14..f7c83ca 100644 --- a/tests/wsum_hess/elementwise_restricted_dom/test_entr.h +++ b/tests/wsum_hess/elementwise_restricted_dom/test_entr.h @@ -27,12 +27,9 @@ const char *test_wsum_hess_entr(void) int expected_p[4] = {0, 1, 2, 3}; int expected_i[3] = {0, 1, 2}; - mu_assert("vals incorrect", - cmp_double_array(entr_node->wsum_hess->x, expected_x, 3)); - mu_assert("rows incorrect", - cmp_int_array(entr_node->wsum_hess->p, expected_p, 4)); - mu_assert("cols incorrect", - cmp_int_array(entr_node->wsum_hess->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(entr_node->wsum_hess, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(entr_node->wsum_hess, expected_p, expected_i, 3, 3)); free_expr(entr_node); diff --git a/tests/wsum_hess/elementwise_restricted_dom/test_log.h b/tests/wsum_hess/elementwise_restricted_dom/test_log.h index 024ea35..c8acff5 100644 --- a/tests/wsum_hess/elementwise_restricted_dom/test_log.h +++ b/tests/wsum_hess/elementwise_restricted_dom/test_log.h @@ -39,12 +39,9 @@ const char *test_wsum_hess_log(void) int expected_p[8] = {0, 0, 0, 1, 2, 3, 3, 3}; int expected_i[3] = {2, 3, 4}; - mu_assert("vals incorrect", - cmp_double_array(log_node->wsum_hess->x, expected_x, 3)); - mu_assert("rows incorrect", - cmp_int_array(log_node->wsum_hess->p, expected_p, 8)); - mu_assert("cols incorrect", - cmp_int_array(log_node->wsum_hess->i, expected_i, 3)); + mu_assert("vals fail", cmp_values(log_node->wsum_hess, expected_x, 3)); + mu_assert("sparsity fail", + cmp_sparsity(log_node->wsum_hess, expected_p, expected_i, 7, 3)); free_expr(log_node); @@ -58,7 +55,7 @@ const char *test_wsum_hess_exp_composite(void) double Ax[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}; int Ai[] = {0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4}; int Ap[] = {0, 5, 10, 15}; - CSR_Matrix *A_csr = new_csr_matrix(3, 5, 15); + CSR_matrix *A_csr = new_CSR_matrix(3, 5, 15); memcpy(A_csr->x, Ax, 15 * sizeof(double)); memcpy(A_csr->i, Ai, 15 * sizeof(int)); memcpy(A_csr->p, Ap, 4 * sizeof(int)); @@ -70,7 +67,7 @@ const char *test_wsum_hess_exp_composite(void) mu_assert("check_wsum_hess failed", check_wsum_hess(exp_node, u_vals, w, NUMERICAL_DIFF_DEFAULT_H)); - free_csr_matrix(A_csr); + free_CSR_matrix(A_csr); free_expr(exp_node); return 0; diff --git a/tests/wsum_hess/other/test_prod.h b/tests/wsum_hess/other/test_prod.h index 753abcf..28184c1 100644 --- a/tests/wsum_hess/other/test_prod.h +++ b/tests/wsum_hess/other/test_prod.h @@ -28,9 +28,9 @@ const char *test_wsum_hess_prod_no_zero(void) int expected_p[9] = {0, 0, 0, 4, 8, 12, 16, 16, 16}; int expected_i[16] = {2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5}; - mu_assert("vals fail", cmp_double_array(p->wsum_hess->x, expected_x, 16)); - mu_assert("rows fail", cmp_int_array(p->wsum_hess->p, expected_p, 9)); - mu_assert("cols fail", cmp_int_array(p->wsum_hess->i, expected_i, 16)); + mu_assert("vals fail", cmp_values(p->wsum_hess, expected_x, 16)); + mu_assert("sparsity fail", + cmp_sparsity(p->wsum_hess, expected_p, expected_i, 8, 16)); free_expr(p); return 0; @@ -62,9 +62,9 @@ const char *test_wsum_hess_prod_one_zero(void) int expected_p[9] = {0, 0, 0, 4, 8, 12, 16, 16, 16}; int expected_i[16] = {2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5}; - mu_assert("vals fail", cmp_double_array(p->wsum_hess->x, expected_x, 16)); - mu_assert("rows fail", cmp_int_array(p->wsum_hess->p, expected_p, 9)); - mu_assert("cols fail", cmp_int_array(p->wsum_hess->i, expected_i, 16)); + mu_assert("vals fail", cmp_values(p->wsum_hess, expected_x, 16)); + mu_assert("sparsity fail", + cmp_sparsity(p->wsum_hess, expected_p, expected_i, 8, 16)); free_expr(p); return 0; @@ -91,9 +91,9 @@ const char *test_wsum_hess_prod_two_zeros(void) int expected_p[9] = {0, 0, 0, 4, 8, 12, 16, 16, 16}; int expected_i[16] = {2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5}; - mu_assert("vals fail", cmp_double_array(p->wsum_hess->x, expected_x, 16)); - mu_assert("rows fail", cmp_int_array(p->wsum_hess->p, expected_p, 9)); - mu_assert("cols fail", cmp_int_array(p->wsum_hess->i, expected_i, 16)); + mu_assert("vals fail", cmp_values(p->wsum_hess, expected_x, 16)); + mu_assert("sparsity fail", + cmp_sparsity(p->wsum_hess, expected_p, expected_i, 8, 16)); free_expr(p); return 0; @@ -118,9 +118,9 @@ const char *test_wsum_hess_prod_many_zeros(void) int expected_p[9] = {0, 0, 0, 4, 8, 12, 16, 16, 16}; int expected_i[16] = {2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5, 2, 3, 4, 5}; - mu_assert("vals fail", cmp_double_array(p->wsum_hess->x, expected_x, 16)); - mu_assert("rows fail", cmp_int_array(p->wsum_hess->p, expected_p, 9)); - mu_assert("cols fail", cmp_int_array(p->wsum_hess->i, expected_i, 16)); + mu_assert("vals fail", cmp_values(p->wsum_hess, expected_x, 16)); + mu_assert("sparsity fail", + cmp_sparsity(p->wsum_hess, expected_p, expected_i, 8, 16)); free_expr(p); return 0; diff --git a/tests/wsum_hess/other/test_prod_axis_one.h b/tests/wsum_hess/other/test_prod_axis_one.h index e6915a0..21b810d 100644 --- a/tests/wsum_hess/other/test_prod_axis_one.h +++ b/tests/wsum_hess/other/test_prod_axis_one.h @@ -47,7 +47,7 @@ const char *test_wsum_hess_prod_axis_one_no_zeros(void) /* Var 6 (row 1, col 2): [8, 4] (excludes col 2) */ 8.0, 4.0}; - /* Row pointers (monotonically increasing for valid CSR format) */ + /* Row pointers (monotonically increasing for valid CSR_matrix format) */ int expected_p[9] = {0, 0, 2, 4, 6, 8, 10, 12, 12}; /* Column indices (each row of the matrix interacts with its own columns, @@ -65,9 +65,9 @@ const char *test_wsum_hess_prod_axis_one_no_zeros(void) /* Var 6 (row 1, col 2): cols 2,4 (excludes 6) */ 2, 4}; - mu_assert("vals fail", cmp_double_array(p->wsum_hess->x, expected_x, 12)); - mu_assert("rows fail", cmp_int_array(p->wsum_hess->p, expected_p, 9)); - mu_assert("cols fail", cmp_int_array(p->wsum_hess->i, expected_i, 12)); + mu_assert("vals fail", cmp_values(p->wsum_hess, expected_x, 12)); + mu_assert("sparsity fail", + cmp_sparsity(p->wsum_hess, expected_p, expected_i, 8, 12)); free_expr(p); return 0; @@ -141,7 +141,7 @@ const char *test_wsum_hess_prod_axis_one_one_zero(void) expected_x[16] = 18.0; expected_x[17] = 9.0; - /* Row pointers (monotonically increasing for valid CSR format) */ + /* Row pointers (monotonically increasing for valid CSR_matrix format) */ int expected_p[11] = {0, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18}; /* Column indices (each row of the matrix interacts with its own columns, @@ -165,9 +165,9 @@ const char *test_wsum_hess_prod_axis_one_one_zero(void) /* Var 9 (row 2, col 2): cols 3,6 (excludes 9) */ 3, 6}; - mu_assert("vals fail", cmp_double_array(p->wsum_hess->x, expected_x, 18)); - mu_assert("rows fail", cmp_int_array(p->wsum_hess->p, expected_p, 11)); - mu_assert("cols fail", cmp_int_array(p->wsum_hess->i, expected_i, 18)); + mu_assert("vals fail", cmp_values(p->wsum_hess, expected_x, 18)); + mu_assert("sparsity fail", + cmp_sparsity(p->wsum_hess, expected_p, expected_i, 10, 18)); free_expr(p); return 0; @@ -210,7 +210,7 @@ const char *test_wsum_hess_prod_axis_one_mixed_zeros(void) memset(expected_x, 0, sizeof(expected_x)); /* For a 5x3 matrix with var_id=1, each row has 2 nnz (d2-1): - * CSR row pointers: p[i] = (i-1)*2 for i in [1,15] + * CSR_matrix row pointers: p[i] = (i-1)*2 for i in [1,15] * Var 1 (matrix [0,0]): p[1]=0 * Var 2 (matrix [1,0]): p[2]=2 * Var 3 (matrix [2,0]): p[3]=4 @@ -293,7 +293,7 @@ const char *test_wsum_hess_prod_axis_one_mixed_zeros(void) expected_x[28] = 25.0; /* 75/(3*1) */ expected_x[29] = 5.0; /* 75/(3*5) */ - /* Row pointers (monotonically increasing for valid CSR format) */ + /* Row pointers (monotonically increasing for valid CSR_matrix format) */ int expected_p[17] = {0, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30}; @@ -317,9 +317,9 @@ const char *test_wsum_hess_prod_axis_one_mixed_zeros(void) } } - mu_assert("vals fail", cmp_double_array(p->wsum_hess->x, expected_x, 30)); - mu_assert("rows fail", cmp_int_array(p->wsum_hess->p, expected_p, 17)); - mu_assert("cols fail", cmp_int_array(p->wsum_hess->i, expected_i, 30)); + mu_assert("vals fail", cmp_values(p->wsum_hess, expected_x, 30)); + mu_assert("sparsity fail", + cmp_sparsity(p->wsum_hess, expected_p, expected_i, 16, 30)); free_expr(p); return 0; @@ -373,9 +373,9 @@ const char *test_wsum_hess_prod_axis_one_2x2(void) 0, /* Var 2 (row 0, col 1): only col 0 */ 1}; /* Var 3 (row 1, col 1): only col 0 */ - mu_assert("vals fail", cmp_double_array(p->wsum_hess->x, expected_x, 4)); - mu_assert("rows fail", cmp_int_array(p->wsum_hess->p, expected_p, 5)); - mu_assert("cols fail", cmp_int_array(p->wsum_hess->i, expected_i, 4)); + mu_assert("vals fail", cmp_values(p->wsum_hess, expected_x, 4)); + mu_assert("sparsity fail", + cmp_sparsity(p->wsum_hess, expected_p, expected_i, 4, 4)); free_expr(p); return 0; diff --git a/tests/wsum_hess/other/test_prod_axis_zero.h b/tests/wsum_hess/other/test_prod_axis_zero.h index 6a54e0d..e057e47 100644 --- a/tests/wsum_hess/other/test_prod_axis_zero.h +++ b/tests/wsum_hess/other/test_prod_axis_zero.h @@ -63,9 +63,9 @@ const char *test_wsum_hess_prod_axis_zero_no_zeros(void) */ int expected_i[12] = {1, 2, 1, 2, 3, 4, 3, 4, 5, 6, 5, 6}; - mu_assert("vals fail", cmp_double_array(p->wsum_hess->x, expected_x, 12)); - mu_assert("rows fail", cmp_int_array(p->wsum_hess->p, expected_p, 9)); - mu_assert("cols fail", cmp_int_array(p->wsum_hess->i, expected_i, 12)); + mu_assert("vals fail", cmp_values(p->wsum_hess, expected_x, 12)); + mu_assert("sparsity fail", + cmp_sparsity(p->wsum_hess, expected_p, expected_i, 8, 12)); free_expr(p); return 0; @@ -75,7 +75,7 @@ const char *test_wsum_hess_prod_axis_zero_mixed_zeros(void) { /* x is 5x3 variable, global index 1, total 16 vars * x = [1, 1, 1, 1, 1, 2, 0, 3, 4, 5, 1, 0, 0, 2, 3] (column-major) - * Matrix (column-major): + * matrix (column-major): * [1, 2, 1] * [1, 0, 0] * [1, 3, 0] @@ -183,9 +183,9 @@ const char *test_wsum_hess_prod_axis_zero_mixed_zeros(void) } } - mu_assert("rows fail", cmp_int_array(p->wsum_hess->p, expected_p, 17)); - mu_assert("cols fail", cmp_int_array(p->wsum_hess->i, expected_i, 75)); - mu_assert("vals fail", cmp_double_array(p->wsum_hess->x, expected_x, 75)); + mu_assert("sparsity fail", + cmp_sparsity(p->wsum_hess, expected_p, expected_i, 16, 75)); + mu_assert("vals fail", cmp_values(p->wsum_hess, expected_x, 75)); free_expr(p); return 0; @@ -196,7 +196,7 @@ const char *test_wsum_hess_prod_axis_zero_one_zero(void) /* Test with a column that has exactly 1 zero * x is 2x2 variable, global index 1, total 5 vars * x = [1.0, 1.0, 2.0, 0.0] (column-major) - * Matrix (column-major): + * matrix (column-major): * [1, 2] * [1, 0] * @@ -245,9 +245,9 @@ const char *test_wsum_hess_prod_axis_zero_one_zero(void) */ int expected_i[8] = {1, 2, 1, 2, 3, 4, 3, 4}; - mu_assert("vals fail", cmp_double_array(p->wsum_hess->x, expected_x, 8)); - mu_assert("rows fail", cmp_int_array(p->wsum_hess->p, expected_p, 6)); - mu_assert("cols fail", cmp_int_array(p->wsum_hess->i, expected_i, 8)); + mu_assert("vals fail", cmp_values(p->wsum_hess, expected_x, 8)); + mu_assert("sparsity fail", + cmp_sparsity(p->wsum_hess, expected_p, expected_i, 5, 8)); free_expr(p); return 0; diff --git a/tests/wsum_hess/other/test_quad_form.h b/tests/wsum_hess/other/test_quad_form.h index 73f8a1d..bd62d5a 100644 --- a/tests/wsum_hess/other/test_quad_form.h +++ b/tests/wsum_hess/other/test_quad_form.h @@ -17,7 +17,7 @@ const char *test_wsum_hess_quad_form(void) * [0 3 4 1] * [0 0 1 6] */ - CSR_Matrix *Q = new_csr_matrix(4, 4, 10); + CSR_matrix *Q = new_CSR_matrix(4, 4, 10); double Qx[10] = {1.0, 2.0, 2.0, 5.0, 3.0, 3.0, 4.0, 1.0, 1.0, 6.0}; int Qi[10] = {0, 1, 0, 1, 2, 1, 2, 3, 2, 3}; int Qp[5] = {0, 2, 5, 8, 10}; @@ -38,11 +38,11 @@ const char *test_wsum_hess_quad_form(void) int expected_i[10] = {3, 4, 3, 4, 5, 4, 5, 6, 5, 6}; double expected_x[10] = {4.0, 8.0, 8.0, 20.0, 12.0, 12.0, 16.0, 4.0, 4.0, 24.0}; - mu_assert("p array fails", cmp_int_array(node->wsum_hess->p, expected_p, 11)); - mu_assert("i array fails", cmp_int_array(node->wsum_hess->i, expected_i, 10)); - mu_assert("x array fails", cmp_double_array(node->wsum_hess->x, expected_x, 10)); + mu_assert("sparsity fail", + cmp_sparsity(node->wsum_hess, expected_p, expected_i, 10, 10)); + mu_assert("vals fail", cmp_values(node->wsum_hess, expected_x, 10)); free_expr(node); - free_csr_matrix(Q); + free_CSR_matrix(Q); return 0; }