Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
819d6fa
start with permuted dense struct, without integration
dance858 May 10, 2026
4e0b355
better polymorphism
dance858 May 10, 2026
fa02469
make hessian matrix
dance858 May 10, 2026
680d3ee
dispatch for elementwise chainr rule hessian
dance858 May 10, 2026
10bf7ce
better polymorphism for copying sparsity pattern
dance858 May 11, 2026
b734cd4
remove many to_csr
dance858 May 11, 2026
9a53450
remove memcpy in to_csr
dance858 May 11, 2026
5ae3cfb
add profile for trimmed log reg
dance858 May 11, 2026
d2e84a7
infrastructure for matrix multiplication when one matrix is permuted_…
dance858 May 11, 2026
12ba841
ran formatter
dance858 May 11, 2026
74a0a42
better BTDA for permuted dense x permuted_dense
dance858 May 12, 2026
c700183
more cache friendly BA_pd_csc_fill_values
dance858 May 12, 2026
2592efd
edits
dance858 May 12, 2026
648657e
better permuted dense times CSC function
dance858 May 12, 2026
a409985
better infrastructure permuted dense
dance858 May 12, 2026
e132919
get rid of unnecessary allocation
dance858 May 12, 2026
ed9de26
change name of some functions
dance858 May 12, 2026
9e853b1
run formatter
dance858 May 12, 2026
78d2860
permutation dense preserving transpose implementation via index polym…
dance858 May 12, 2026
ce3e33c
clean up multiply
dance858 May 13, 2026
06a9b0e
clean up matrix.h
dance858 May 13, 2026
3e4ea9f
lazy allocation of dwork in permuted_dense
dance858 May 13, 2026
7e3c23e
Implement block_left_mult on permuted_dense
dance858 May 13, 2026
fe1096a
swap dense matrix for permuted dense and delete dense_matrix
dance858 May 13, 2026
2d421c7
more infrastructure
dance858 May 13, 2026
7375158
better BA_pd_pd_fill_values
dance858 May 13, 2026
bc9aa3b
add permuted dense polymorphism to left matmul
dance858 May 13, 2026
f6afb1b
minor
dance858 May 13, 2026
f8952d8
add test
dance858 May 13, 2026
c36f46a
clean up left_matmul and enable parameter
dance858 May 14, 2026
d74ea3e
fix filename
dance858 May 14, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions include/atoms/affine.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,7 @@
#define AFFINE_H

#include "expr.h"
#include "subexpr.h"
#include "utils/CSR_Matrix.h"
#include "utils/CSR_matrix.h"

expr *new_add(expr *left, expr *right);
expr *new_neg(expr *child);
Expand All @@ -45,7 +44,7 @@ expr *new_transpose(expr *child);
/* Left matrix multiplication: A @ f(x) where A is a constant sparse matrix.
param_node is NULL for fixed constants. We currently do not support sparse
parameters, so param_node should always be null. */
expr *new_left_matmul(expr *param_node, expr *u, const CSR_Matrix *A);
expr *new_left_matmul(expr *param_node, expr *u, const CSR_matrix *A);

/* Left matrix multiplication: A @ f(x) where A is a constant dense matrix
(in row-major, m x n, with values given by 'data') or a parameter
Expand All @@ -59,7 +58,7 @@ expr *new_left_matmul_dense(expr *param_node, expr *u, int m, int n,
/* Right matrix multiplication: f(x) @ A where A is a constant sparse matrix.
We currently do not support sparse parameters, so param_node should always be
null. */
expr *new_right_matmul(expr *param_node, expr *u, const CSR_Matrix *A);
expr *new_right_matmul(expr *param_node, expr *u, const CSR_matrix *A);

/* Right matrix multiplication: f(x) @ A where A is a constant dense matrix
(in row-major, m x n, with values given by 'data') or a parameter
Expand Down
2 changes: 1 addition & 1 deletion include/atoms/bivariate_full_dom.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

expr *new_elementwise_mult(expr *left, expr *right);

/* Matrix multiplication: Z = X @ Y */
/* matrix multiplication: Z = X @ Y */
expr *new_matmul(expr *x, expr *y);

#endif /* BIVARIATE_FULL_DOM_H */
4 changes: 2 additions & 2 deletions include/atoms/non_elementwise_full_dom.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@

#include "expr.h"
#include "subexpr.h"
#include "utils/CSR_Matrix.h"
#include "utils/CSR_matrix.h"

expr *new_quad_form(expr *child, CSR_Matrix *Q);
expr *new_quad_form(expr *child, CSR_matrix *Q);

/* product of all entries, without axis argument */
expr *new_prod(expr *child);
Expand Down
19 changes: 10 additions & 9 deletions include/expr.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@
#ifndef EXPR_H
#define EXPR_H

#include "utils/CSC_Matrix.h"
#include "utils/CSR_Matrix.h"
#include "utils/CSC_matrix.h"
#include "utils/CSR_matrix.h"
#include "utils/matrix.h"
#include <stdbool.h>
#include <stddef.h>
#include <string.h>
Expand All @@ -44,16 +45,16 @@ typedef struct
{
double *dwork;
int *iwork;
CSC_Matrix *jacobian_csc;
int *csc_work; /* for CSR-CSC conversion */
CSC_matrix *jacobian_csc;
int *csc_work; /* for CSR_matrix-CSC_matrix conversion */

/* jacobian_csc_filled is only used for affine functions to avoid redundant
conversions. Could become relevant for non-affine functions if we start
supporting common subexpressions on the Python side. */
bool jacobian_csc_filled;
double *local_jac_diag; /* cached f'(g(x)) diagonal */
CSR_Matrix *hess_term1; /* Jg^T D Jg workspace */
CSR_Matrix *hess_term2; /* child wsum_hess workspace */
matrix *hess_term1; /* Jg^T D Jg workspace */
matrix *hess_term2; /* child wsum_hess workspace */
} Expr_Work;

/* Base expression node structure */
Expand All @@ -70,8 +71,8 @@ typedef struct expr
// oracle related quantities
// ------------------------------------------------------------------------
double *value;
CSR_Matrix *jacobian;
CSR_Matrix *wsum_hess;
matrix *jacobian;
matrix *wsum_hess;
forward_fn forward;
jacobian_init_fn jacobian_init_impl;
wsum_hess_init_fn wsum_hess_init_impl;
Expand Down Expand Up @@ -110,7 +111,7 @@ void free_expr(expr *node);
void jacobian_init(expr *node);
void wsum_hess_init(expr *node);

/* Initialize CSC form of the Jacobian from the CSR Jacobian.
/* Initialize CSC_matrix form of the Jacobian from the CSR_matrix Jacobian.
* Must be called after jacobian_init. */
void jacobian_csc_init(expr *node);

Expand Down
18 changes: 9 additions & 9 deletions include/old-code/old_CSR.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,29 +18,29 @@
#ifndef OLD_CSR_H
#define OLD_CSR_H

#include "utils/CSR_Matrix.h"
#include "utils/CSR_matrix.h"

/* Build (I_p kron A) = blkdiag(A, A, ..., A) of size (p*A->m) x (p*A->n) */
CSR_Matrix *block_diag_repeat_csr(const CSR_Matrix *A, int p);
CSR_matrix *block_diag_repeat_csr(const CSR_matrix *A, int p);

/* Build (A kron I_p) of size (A->m * p) x (A->n * p) with nnz = A->nnz * p. */
CSR_Matrix *kron_identity_csr(const CSR_Matrix *A, int p);
CSR_matrix *kron_identity_csr(const CSR_matrix *A, int p);

/* Computes values of the row matrix C = z^T A (column indices must have been
pre-computed) and transposed matrix AT must be provided) */
void Ax_csr_fill_values(const CSR_Matrix *AT, const double *z, CSR_Matrix *C);
void Ax_csr_fill_values(const CSR_matrix *AT, const double *z, CSR_matrix *C);

/* Insert value into CSR matrix A with just one row at col_idx. Assumes that A
/* Insert value into CSR_matrix matrix A with just one row at col_idx. Assumes that A
has enough space and that A does not have an element at col_idx. It does update
nnz. */
void csr_insert_value(CSR_Matrix *A, int col_idx, double value);
void csr_insert_value(CSR_matrix *A, int col_idx, double value);

/* Compute C = diag(d) * A where d is an array and A, C are CSR matrices
/* Compute C = diag(d) * A where d is an array and A, C are CSR_matrix matrices
* d must have length m
* C must be pre-allocated with same dimensions as A */
void diag_csr_mult(const double *d, const CSR_Matrix *A, CSR_Matrix *C);
void diag_csr_mult(const double *d, const CSR_matrix *A, CSR_matrix *C);

/* y = Ax, where y is returned as dense (no column offset) */
void Ax_csr_wo_offset(const CSR_Matrix *A, const double *x, double *y);
void Ax_csr_wo_offset(const CSR_matrix *A, const double *x, double *y);

#endif /* OLD_CSR_H */
22 changes: 11 additions & 11 deletions include/old-code/old_CSR_sum.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,44 +18,44 @@
#ifndef OLD_CSR_SUM_H
#define OLD_CSR_SUM_H

#include "utils/CSR_Matrix.h"
#include "utils/CSR_matrix.h"

/* Compute C = A + B where A, B, C are CSR matrices
/* Compute C = A + B where A, B, C are CSR_matrix matrices
* A and B must have same dimensions
* C must be pre-allocated with sufficient nnz capacity.
* C must be different from A and B */
void sum_csr_matrices(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C);
void sum_csr_matrices(const CSR_matrix *A, const CSR_matrix *B, CSR_matrix *C);

/* Compute C = diag(d1) * A + diag(d2) * B where A, B, C are CSR matrices */
void sum_scaled_csr_matrices(const CSR_Matrix *A, const CSR_Matrix *B, CSR_Matrix *C,
/* Compute C = diag(d1) * A + diag(d2) * B where A, B, C are CSR_matrix matrices */
void sum_scaled_csr_matrices(const CSR_matrix *A, const CSR_matrix *B, CSR_matrix *C,
const double *d1, const double *d2);

/* forward declaration */
struct int_double_pair;

/* Sum all rows of A into a single row matrix C */
void sum_all_rows_csr(const CSR_Matrix *A, CSR_Matrix *C,
void sum_all_rows_csr(const CSR_matrix *A, CSR_matrix *C,
struct int_double_pair *pairs);

/* Sum blocks of rows of A into a matrix C */
void sum_block_of_rows_csr(const CSR_Matrix *A, CSR_Matrix *C,
void sum_block_of_rows_csr(const CSR_matrix *A, CSR_matrix *C,
struct int_double_pair *pairs, int row_block_size);

/* Sum evenly spaced rows of A into a matrix C */
void sum_evenly_spaced_rows_csr(const CSR_Matrix *A, CSR_Matrix *C,
void sum_evenly_spaced_rows_csr(const CSR_matrix *A, CSR_matrix *C,
struct int_double_pair *pairs, int row_spacing);

/* Sum evenly spaced rows of A starting at offset into a row matrix C */
void sum_spaced_rows_into_row_csr(const CSR_Matrix *A, CSR_Matrix *C,
void sum_spaced_rows_into_row_csr(const CSR_matrix *A, CSR_matrix *C,
struct int_double_pair *pairs, int offset,
int spacing);

/* Fill values of summed rows using precomputed idx_map and sparsity of C */
void sum_all_rows_csr_fill_values(const CSR_Matrix *A, CSR_Matrix *C,
void sum_all_rows_csr_fill_values(const CSR_matrix *A, CSR_matrix *C,
const int *idx_map);

/* Fill values of summed block rows using precomputed idx_map */
void sum_block_of_rows_csr_fill_values(const CSR_Matrix *A, CSR_Matrix *C,
void sum_block_of_rows_csr_fill_values(const CSR_matrix *A, CSR_matrix *C,
const int *idx_map);

#endif /* OLD_CSR_SUM_H */
4 changes: 2 additions & 2 deletions include/old-code/old_affine.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
#define OLD_AFFINE_H

#include "expr.h"
#include "utils/CSR_Matrix.h"
#include "utils/CSR_matrix.h"

expr *new_linear(expr *u, const CSR_Matrix *A, const double *b);
expr *new_linear(expr *u, const CSR_matrix *A, const double *b);

#endif /* OLD_AFFINE_H */
76 changes: 76 additions & 0 deletions include/old-code/old_permuted_dense.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
/*
* Copyright 2026 Daniel Cederberg and William Zhang
*
* This file is part of the SparseDiffEngine project.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef OLD_PERMUTED_DENSE_H
#define OLD_PERMUTED_DENSE_H

#include "utils/CSR_matrix.h"
#include "utils/permuted_dense.h"

/* Legacy CSR-based (PD, Sparse) BTA / BTDA kernels.

Mathematically equivalent to BTA_pd_csc_alloc / BTDA_pd_csc_fill_values
in src/utils/permuted_dense.c — they all compute C = B^T (diag(d)) A
for B PD and A sparse. The matrix_BTA dispatcher used to choose between
the CSR-here and CSC-in-utils variants; after a benchmark on
trimmed_log_reg-shaped workloads we committed to CSC and moved these
kernels out of production paths.

Kept here as a reference implementation, as cross-comparison fodder for
tests (test_BTA_pd_csc_matches_csr), and as the CSR side of the
profile_BTA_pd_csr_vs_csc microbenchmark. */

/* Allocate a new permuted_dense for C = B^T A where B is PD and A is
CSR-sparse. Output is PD with row_perm = B->col_perm and col_perm = the
sorted union of columns appearing in A's rows at positions row_perm_B.
Dense block size = (B->n0, |col_active|). Values uninitialized. */
matrix *BTA_pd_csr_alloc(const permuted_dense *B, const CSR_matrix *A);

/* Fill C->X = X_B^T @ A_sub_dense, where A_sub_dense is A's rows at
positions row_perm_B, columns restricted to C's col_perm, scattered to a
dense buffer. C must have the structure produced by BTA_pd_csr_alloc. */
void BTA_pd_csr_fill_values(const permuted_dense *B, const CSR_matrix *A,
permuted_dense *C);

/* BTDA variant: C->X = X_B^T diag(d) A_sub_dense. d may be NULL (treated
as identity scaling). C must have the structure produced by
BTA_pd_csr_alloc. */
void BTDA_pd_csr_fill_values(const permuted_dense *B, const double *d,
const CSR_matrix *A, permuted_dense *C);

/* Legacy CSR-pd kernels (B=CSR, A=PD), formerly in src/utils/permuted_dense.c.
Production now dispatches the (PD A, sparse B) branch through CSC-pd
kernels (BTA_csc_pd_alloc / BTDA_csc_pd_fill_values in utils/permuted_dense.h),
so these CSR variants live here as reference implementations and as
targets for the direct unit tests in tests/old-code. */

/* Allocate a new permuted_dense for C = B^T A where B is CSR-sparse and A
is PD. Output is PD with row_perm = the sorted union of columns appearing
in B's rows at positions row_perm_A, and col_perm = A->col_perm. */
matrix *BTA_csr_pd_alloc(const CSR_matrix *B_csr, const permuted_dense *A);

/* No-d BTA fill. C must have the structure produced by BTA_csr_pd_alloc. */
void BTA_csr_pd_fill_values(const CSR_matrix *B_csr, const permuted_dense *A,
permuted_dense *C);

/* BTDA variant: C->X = B_sub_dense^T diag(d) X_A. d may be NULL (treated
as identity scaling). C must have the structure produced by
BTA_csr_pd_alloc. */
void BTDA_csr_pd_fill_values(const CSR_matrix *B_csr, const double *d,
const permuted_dense *A, permuted_dense *C);

#endif /* OLD_PERMUTED_DENSE_H */
12 changes: 6 additions & 6 deletions include/problem.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
#define PROBLEM_H

#include "expr.h"
#include "utils/COO_Matrix.h"
#include "utils/CSR_Matrix.h"
#include "utils/COO_matrix.h"
#include "utils/CSR_matrix.h"
#include "utils/Timer.h"
#include <stdbool.h>

Expand Down Expand Up @@ -59,11 +59,11 @@ typedef struct problem
double *gradient_values;

/* allocated by problem_init_derivatives */
CSR_Matrix *jacobian;
CSR_Matrix *lagrange_hessian;
CSR_matrix *jacobian;
CSR_matrix *lagrange_hessian;
int *hess_idx_map; /* maps all wsum_hess nnz to lagrange_hessian */
COO_Matrix *jacobian_coo;
COO_Matrix *lagrange_hessian_coo; /* lower triangular part stored in COO */
COO_matrix *jacobian_coo;
COO_matrix *lagrange_hessian_coo; /* lower triangular part stored in COO */

/* for the affine shortcut we keep track of the first time the jacobian and
* hessian are called */
Expand Down
Loading
Loading