From 84d945bce0f2ec9a9472c56c0392bdb61ee56c73 Mon Sep 17 00:00:00 2001
From: dance858 <danielcederberg1@gmail.com>
Date: Tue, 14 Apr 2026 12:23:43 -0700
Subject: [PATCH 01/17] first draft

---
 sparsediffpy/__init__.py                      |  36 ++
 sparsediffpy/_core/__init__.py                |   0
 sparsediffpy/_core/_compile.py                | 353 +++++++++++++++++
 sparsediffpy/_core/_constants.py              |  81 ++++
 sparsediffpy/_core/_expression.py             | 242 ++++++++++++
 sparsediffpy/_core/_functions.py              | 216 +++++++++++
 sparsediffpy/_core/_nodes_affine.py           | 130 +++++++
 sparsediffpy/_core/_nodes_bivariate.py        |  41 ++
 sparsediffpy/_core/_nodes_elementwise.py      |  95 +++++
 sparsediffpy/_core/_nodes_other.py            |  35 ++
 sparsediffpy/_core/_registry.py               | 178 +++++++++
 sparsediffpy/_core/_scope.py                  | 133 +++++++
 sparsediffpy/_core/_shapes.py                 |  86 ++++
 tests/affine/__init__.py                      |   0
 tests/affine/test_add.py                      | 128 ++++++
 tests/affine/test_diag_vec.py                 |  22 ++
 tests/affine/test_hstack.py                   |  33 ++
 tests/affine/test_index.py                    |  67 ++++
 tests/affine/test_left_matmul.py              |  60 +++
 tests/affine/test_neg.py                      |  27 ++
 tests/affine/test_reshape.py                  |  19 +
 tests/affine/test_right_matmul.py             |  33 ++
 tests/affine/test_scalar_mult.py              |  55 +++
 tests/affine/test_sub.py                      |  39 ++
 tests/affine/test_sum.py                      |  35 ++
 tests/affine/test_trace.py                    |  20 +
 tests/affine/test_transpose.py                |  19 +
 tests/affine/test_vector_mult.py              |  41 ++
 tests/affine/test_vstack.py                   |  32 ++
 tests/bivariate_full_dom/__init__.py          |   0
 tests/bivariate_full_dom/test_matmul.py       |  32 ++
 tests/bivariate_full_dom/test_multiply.py     |  42 ++
 tests/complicated/__init__.py                 |   0
 tests/complicated/test_compositions.py        | 312 +++++++++++++++
 tests/conftest.py                             |  13 +
 tests/elementwise_full_dom/__init__.py        |   0
 tests/elementwise_full_dom/test_asinh.py      |  20 +
 tests/elementwise_full_dom/test_cos.py        |  28 ++
 tests/elementwise_full_dom/test_exp.py        |  44 +++
 tests/elementwise_full_dom/test_logistic.py   |  30 ++
 tests/elementwise_full_dom/test_normal_cdf.py |  29 ++
 tests/elementwise_full_dom/test_power.py      |  45 +++
 tests/elementwise_full_dom/test_sin.py        |  36 ++
 tests/elementwise_full_dom/test_sinh.py       |  20 +
 tests/elementwise_full_dom/test_tanh.py       |  20 +
 tests/elementwise_full_dom/test_xexp.py       |  28 ++
 tests/elementwise_restricted_dom/__init__.py  |   0
 .../elementwise_restricted_dom/test_atanh.py  |  21 +
 tests/elementwise_restricted_dom/test_entr.py |  28 ++
 tests/elementwise_restricted_dom/test_log.py  |  36 ++
 tests/elementwise_restricted_dom/test_tan.py  |  21 +
 tests/other/__init__.py                       |   0
 tests/other/test_prod.py                      |  44 +++
 tests/other/test_quad_form.py                 |  43 ++
 tests/other/test_quad_over_lin.py             |  34 ++
 tests/other/test_rel_entr.py                  |  34 ++
 tests/test_misc.py                            | 366 ++++++++++++++++++
 tests/test_row_vectors.py                     | 140 +++++++
 tests/test_validation.py                      | 225 +++++++++++
 tests/utils.py                                | 119 ++++++
 60 files changed, 4066 insertions(+)
 create mode 100644 sparsediffpy/_core/__init__.py
 create mode 100644 sparsediffpy/_core/_compile.py
 create mode 100644 sparsediffpy/_core/_constants.py
 create mode 100644 sparsediffpy/_core/_expression.py
 create mode 100644 sparsediffpy/_core/_functions.py
 create mode 100644 sparsediffpy/_core/_nodes_affine.py
 create mode 100644 sparsediffpy/_core/_nodes_bivariate.py
 create mode 100644 sparsediffpy/_core/_nodes_elementwise.py
 create mode 100644 sparsediffpy/_core/_nodes_other.py
 create mode 100644 sparsediffpy/_core/_registry.py
 create mode 100644 sparsediffpy/_core/_scope.py
 create mode 100644 sparsediffpy/_core/_shapes.py
 create mode 100644 tests/affine/__init__.py
 create mode 100644 tests/affine/test_add.py
 create mode 100644 tests/affine/test_diag_vec.py
 create mode 100644 tests/affine/test_hstack.py
 create mode 100644 tests/affine/test_index.py
 create mode 100644 tests/affine/test_left_matmul.py
 create mode 100644 tests/affine/test_neg.py
 create mode 100644 tests/affine/test_reshape.py
 create mode 100644 tests/affine/test_right_matmul.py
 create mode 100644 tests/affine/test_scalar_mult.py
 create mode 100644 tests/affine/test_sub.py
 create mode 100644 tests/affine/test_sum.py
 create mode 100644 tests/affine/test_trace.py
 create mode 100644 tests/affine/test_transpose.py
 create mode 100644 tests/affine/test_vector_mult.py
 create mode 100644 tests/affine/test_vstack.py
 create mode 100644 tests/bivariate_full_dom/__init__.py
 create mode 100644 tests/bivariate_full_dom/test_matmul.py
 create mode 100644 tests/bivariate_full_dom/test_multiply.py
 create mode 100644 tests/complicated/__init__.py
 create mode 100644 tests/complicated/test_compositions.py
 create mode 100644 tests/conftest.py
 create mode 100644 tests/elementwise_full_dom/__init__.py
 create mode 100644 tests/elementwise_full_dom/test_asinh.py
 create mode 100644 tests/elementwise_full_dom/test_cos.py
 create mode 100644 tests/elementwise_full_dom/test_exp.py
 create mode 100644 tests/elementwise_full_dom/test_logistic.py
 create mode 100644 tests/elementwise_full_dom/test_normal_cdf.py
 create mode 100644 tests/elementwise_full_dom/test_power.py
 create mode 100644 tests/elementwise_full_dom/test_sin.py
 create mode 100644 tests/elementwise_full_dom/test_sinh.py
 create mode 100644 tests/elementwise_full_dom/test_tanh.py
 create mode 100644 tests/elementwise_full_dom/test_xexp.py
 create mode 100644 tests/elementwise_restricted_dom/__init__.py
 create mode 100644 tests/elementwise_restricted_dom/test_atanh.py
 create mode 100644 tests/elementwise_restricted_dom/test_entr.py
 create mode 100644 tests/elementwise_restricted_dom/test_log.py
 create mode 100644 tests/elementwise_restricted_dom/test_tan.py
 create mode 100644 tests/other/__init__.py
 create mode 100644 tests/other/test_prod.py
 create mode 100644 tests/other/test_quad_form.py
 create mode 100644 tests/other/test_quad_over_lin.py
 create mode 100644 tests/other/test_rel_entr.py
 create mode 100644 tests/test_misc.py
 create mode 100644 tests/test_row_vectors.py
 create mode 100644 tests/test_validation.py
 create mode 100644 tests/utils.py

diff --git a/sparsediffpy/__init__.py b/sparsediffpy/__init__.py
index 9b73c36..1f361fc 100644
--- a/sparsediffpy/__init__.py
+++ b/sparsediffpy/__init__.py
@@ -14,4 +14,40 @@
 limitations under the License.
 """
 
+# C extension (low-level, for advanced users)
 from sparsediffpy import _sparsediffengine  # noqa: F401
+
+# Core classes
+from sparsediffpy._core._scope import Scope, Variable, Parameter  # noqa: F401
+from sparsediffpy._core._expression import Expression  # noqa: F401
+
+# Compile
+from sparsediffpy._core._compile import compile  # noqa: F401
+
+# Named functions
+from sparsediffpy._core._functions import (  # noqa: F401
+    sin,
+    cos,
+    exp,
+    log,
+    tan,
+    sinh,
+    tanh,
+    asinh,
+    atanh,
+    logistic,
+    normal_cdf,
+    entr,
+    xexp,
+    diag_vec,
+    power,
+    sum,
+    prod,
+    reshape,
+    trace,
+    hstack,
+    vstack,
+    quad_form,
+    quad_over_lin,
+    rel_entr,
+)
diff --git a/sparsediffpy/_core/__init__.py b/sparsediffpy/_core/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/sparsediffpy/_core/_compile.py b/sparsediffpy/_core/_compile.py
new file mode 100644
index 0000000..4fb5ded
--- /dev/null
+++ b/sparsediffpy/_core/_compile.py
@@ -0,0 +1,353 @@
+"""compile() and CompiledExpression.
+
+The recursive tree walker that converts Python expression nodes to C capsules.
+Node-type-to-C-call mapping lives in _registry.py.
+"""
+
+import numpy as np
+import scipy.sparse
+
+from sparsediffpy import _sparsediffengine as _C
+from sparsediffpy._core._constants import Constant, SparseConstant
+from sparsediffpy._core._nodes_affine import (
+    LeftMatMul,
+    ParamScalarMult,
+    ParamVectorMult,
+    RightMatMul,
+)
+from sparsediffpy._core._registry import (
+    ATOM_CONVERTERS,
+    _to_dense_row_major,
+    make_dense_left_matmul,
+    make_dense_right_matmul,
+    make_sparse_left_matmul,
+    make_sparse_right_matmul,
+)
+from sparsediffpy._core._scope import Parameter, Variable
+
+
+def compile(expr):
+    """Compile an expression tree into a CompiledExpression.
+
+    Walks the Python expression tree, discovers all Variables and Parameters,
+    builds C capsules bottom-up, creates a C problem, and initializes
+    sparsity patterns for Jacobian and Hessian computation.
+    """
+    # 1. Collect all Variable and Parameter leaves
+    variables = []
+    parameters = []
+    _collect_leaves(expr, variables, parameters, set())
+
+    # 2. Determine the scope
+    scope = None
+    for v in variables:
+        if scope is None:
+            scope = v._scope
+        elif v._scope is not scope:
+            raise ValueError("All variables must belong to the same Scope")
+
+    if scope is None:
+        from sparsediffpy._core._scope import Scope
+        scope = Scope()
+
+    n_vars = scope._next_var_offset
+    if n_vars == 0:
+        n_vars = 1  # C layer needs at least 1 variable
+
+    # 3. Build C capsules bottom-up
+    capsule_cache = {}
+    param_capsules_ordered = []
+    param_objects_ordered = []
+    root_capsule = _build_capsule(
+        expr, n_vars, capsule_cache, param_capsules_ordered, param_objects_ordered
+    )
+
+    # 4. Create dummy zero objective (scalar)
+    dummy_obj = _C.make_parameter(1, 1, -1, n_vars, np.array([0.0]))
+
+    # 5. Create C problem with expr as the single constraint
+    problem = _C.make_problem(dummy_obj, [root_capsule], False)
+
+    # 6. Register parameters if any
+    if param_capsules_ordered:
+        _C.problem_register_params(problem, param_capsules_ordered)
+
+    # 7. Init sparsity patterns
+    _C.problem_init_jacobian(problem)
+    _C.problem_init_hessian(problem)
+
+    return CompiledExpression(
+        problem_capsule=problem,
+        scope=scope,
+        param_capsules=param_capsules_ordered,
+        param_objects=param_objects_ordered,
+        expr_shape=expr.shape,
+        n_vars=n_vars,
+    )
+
+
+# ---------------------------------------------------------------------------
+# Tree walking
+# ---------------------------------------------------------------------------
+
+def _collect_leaves(node, variables, parameters, visited):
+    """Walk the expression tree to collect Variable and Parameter leaves."""
+    node_id = id(node)
+    if node_id in visited:
+        return
+    visited.add(node_id)
+
+    if isinstance(node, Variable):
+        variables.append(node)
+        return
+    if isinstance(node, Parameter):
+        parameters.append(node)
+        return
+    if isinstance(node, (Constant, SparseConstant)):
+        return
+
+    # Walk children
+    if hasattr(node, "child"):
+        _collect_leaves(node.child, variables, parameters, visited)
+    if hasattr(node, "left"):
+        _collect_leaves(node.left, variables, parameters, visited)
+    if hasattr(node, "right"):
+        _collect_leaves(node.right, variables, parameters, visited)
+    if hasattr(node, "x") and hasattr(node, "z"):
+        _collect_leaves(node.x, variables, parameters, visited)
+        _collect_leaves(node.z, variables, parameters, visited)
+    elif hasattr(node, "x") and hasattr(node, "y"):
+        _collect_leaves(node.x, variables, parameters, visited)
+        _collect_leaves(node.y, variables, parameters, visited)
+    if hasattr(node, "param_expr"):
+        _collect_leaves(node.param_expr, variables, parameters, visited)
+    if hasattr(node, "matrix_expr"):
+        _collect_leaves(node.matrix_expr, variables, parameters, visited)
+    if hasattr(node, "children"):
+        for c in node.children:
+            _collect_leaves(c, variables, parameters, visited)
+
+
+# ---------------------------------------------------------------------------
+# Capsule building
+# ---------------------------------------------------------------------------
+
+def _build_capsule(node, n_vars, cache, param_caps, param_objs):
+    """Recursively build C capsules for the expression tree."""
+    node_id = id(node)
+    if node_id in cache:
+        return cache[node_id]
+
+    cap = _convert_node(node, n_vars, cache, param_caps, param_objs)
+
+    # Post-conversion dimension check
+    d1_c, d2_c = _C.get_expr_dimensions(cap)
+    d1_py, d2_py = node.shape
+    if d1_c != d1_py or d2_c != d2_py:
+        raise ValueError(
+            f"Dimension mismatch for {type(node).__name__}: "
+            f"C dimensions ({d1_c}, {d2_c}) vs Python dimensions ({d1_py}, {d2_py})"
+        )
+
+    cache[node_id] = cap
+    return cap
+
+
+def _convert_node(node, n_vars, cache, param_caps, param_objs):
+    """Convert a single Python expression node to a C capsule."""
+
+    # --- Leaves ---
+    if isinstance(node, Variable):
+        return _C.make_variable(
+            node.shape[0], node.shape[1], node._var_id, n_vars
+        )
+
+    if isinstance(node, Parameter):
+        cap = _C.make_parameter(
+            node.shape[0], node.shape[1], node._param_id, n_vars,
+            node._value_flat,
+        )
+        param_caps.append(cap)
+        param_objs.append(node)
+        return cap
+
+    if isinstance(node, Constant):
+        return _C.make_parameter(
+            node.shape[0], node.shape[1], -1, n_vars, node._value_flat
+        )
+
+    if isinstance(node, SparseConstant):
+        return _C.make_parameter(
+            node.shape[0], node.shape[1], -1, n_vars, node._to_dense_flat()
+        )
+
+    # --- Matmul and multiply with parameter dispatch ---
+    # These need special handling because they access matrix_expr / param_expr
+    # directly rather than going through a uniform children list.
+    if isinstance(node, LeftMatMul):
+        return _convert_left_matmul(node, n_vars, cache, param_caps, param_objs)
+
+    if isinstance(node, RightMatMul):
+        return _convert_right_matmul(node, n_vars, cache, param_caps, param_objs)
+
+    if isinstance(node, ParamScalarMult):
+        param_cap = _build_capsule(node.param_expr, n_vars, cache, param_caps, param_objs)
+        child_cap = _build_capsule(node.child, n_vars, cache, param_caps, param_objs)
+        return _C.make_param_scalar_mult(param_cap, child_cap)
+
+    if isinstance(node, ParamVectorMult):
+        param_cap = _build_capsule(node.param_expr, n_vars, cache, param_caps, param_objs)
+        child_cap = _build_capsule(node.child, n_vars, cache, param_caps, param_objs)
+        return _C.make_param_vector_mult(param_cap, child_cap)
+
+    # --- Registry lookup ---
+    node_type = type(node)
+    if node_type in ATOM_CONVERTERS:
+        child_caps = _build_children(node, n_vars, cache, param_caps, param_objs)
+        return ATOM_CONVERTERS[node_type](node, child_caps)
+
+    raise TypeError(f"Unknown expression node type: {node_type.__name__}")
+
+
+def _build_children(node, n_vars, cache, param_caps, param_objs):
+    """Build C capsules for all children of a node, returned as a list."""
+    caps = []
+    # Unary: .child
+    if hasattr(node, "child"):
+        caps.append(_build_capsule(node.child, n_vars, cache, param_caps, param_objs))
+    # Binary: .left, .right
+    if hasattr(node, "left"):
+        caps.append(_build_capsule(node.left, n_vars, cache, param_caps, param_objs))
+    if hasattr(node, "right"):
+        caps.append(_build_capsule(node.right, n_vars, cache, param_caps, param_objs))
+    # QuadOverLin/RelEntr: .x, .y or .x, .z
+    if hasattr(node, "x") and not caps:
+        caps.append(_build_capsule(node.x, n_vars, cache, param_caps, param_objs))
+        if hasattr(node, "z"):
+            caps.append(_build_capsule(node.z, n_vars, cache, param_caps, param_objs))
+        elif hasattr(node, "y"):
+            caps.append(_build_capsule(node.y, n_vars, cache, param_caps, param_objs))
+    # HStack: .children
+    if hasattr(node, "children"):
+        for c in node.children:
+            caps.append(_build_capsule(c, n_vars, cache, param_caps, param_objs))
+    return caps
+
+
+# ---------------------------------------------------------------------------
+# Left/right matmul converters
+# ---------------------------------------------------------------------------
+
+def _convert_left_matmul(node, n_vars, cache, param_caps, param_objs):
+    """Convert A @ f(x)."""
+    child_cap = _build_capsule(node.child, n_vars, cache, param_caps, param_objs)
+    matrix = node.matrix_expr
+    m, n = matrix.shape
+
+    if isinstance(matrix, SparseConstant):
+        return make_sparse_left_matmul(None, child_cap, matrix)
+
+    if isinstance(matrix, Parameter):
+        param_cap = _build_capsule(matrix, n_vars, cache, param_caps, param_objs)
+        return make_dense_left_matmul(
+            param_cap, child_cap, _to_dense_row_major(matrix), m, n
+        )
+
+    if isinstance(matrix, Constant):
+        return make_dense_left_matmul(
+            None, child_cap, _to_dense_row_major(matrix), m, n
+        )
+
+    raise TypeError(f"LeftMatMul matrix must be Constant, SparseConstant, or Parameter")
+
+
+def _convert_right_matmul(node, n_vars, cache, param_caps, param_objs):
+    """Convert f(x) @ A."""
+    child_cap = _build_capsule(node.child, n_vars, cache, param_caps, param_objs)
+    matrix = node.matrix_expr
+    m, n = matrix.shape
+
+    if isinstance(matrix, SparseConstant):
+        return make_sparse_right_matmul(None, child_cap, matrix)
+
+    if isinstance(matrix, Parameter):
+        param_cap = _build_capsule(matrix, n_vars, cache, param_caps, param_objs)
+        return make_dense_right_matmul(
+            param_cap, child_cap, _to_dense_row_major(matrix), m, n
+        )
+
+    if isinstance(matrix, Constant):
+        return make_dense_right_matmul(
+            None, child_cap, _to_dense_row_major(matrix), m, n
+        )
+
+    raise TypeError(f"RightMatMul matrix must be Constant, SparseConstant, or Parameter")
+
+
+# ---------------------------------------------------------------------------
+# CompiledExpression
+# ---------------------------------------------------------------------------
+
+class CompiledExpression:
+    """A compiled expression ready for evaluation.
+
+    Reads variable values from the scope's flat buffer.
+    Reads parameter values from the Parameter objects.
+    """
+
+    def __init__(self, problem_capsule, scope, param_capsules, param_objects,
+                 expr_shape, n_vars):
+        self._problem = problem_capsule
+        self._scope = scope
+        self._param_capsules = param_capsules
+        self._param_objects = param_objects
+        self._expr_shape = expr_shape
+        self._n_vars = n_vars
+
+    def _sync_params(self):
+        """Push current parameter values to the C problem."""
+        if not self._param_objects:
+            return
+        theta_parts = [p._value_flat for p in self._param_objects]
+        theta = np.concatenate(theta_parts)
+        _C.problem_update_params(self._problem, theta)
+
+    def _set_point(self):
+        """Push variable values and evaluate forward pass."""
+        self._sync_params()
+        _C.problem_objective_forward(self._problem, self._scope._flat_values)
+        _C.problem_constraint_forward(self._problem, self._scope._flat_values)
+
+    def forward(self):
+        """Evaluate the expression at the current variable values."""
+        self._set_point()
+        result = _C.problem_constraint_forward(self._problem, self._scope._flat_values)
+        return result
+
+    def jacobian(self):
+        """Compute the sparse Jacobian at the current variable values.
+
+        Returns scipy.sparse.csr_matrix of shape (expr_size, n_vars).
+        """
+        self._set_point()
+        data, indices, indptr, (m, n) = _C.problem_jacobian(self._problem)
+        return scipy.sparse.csr_matrix((data, indices, indptr), shape=(m, n))
+
+    def hessian(self, weights):
+        """Compute the sparse Hessian of the weighted expression.
+
+        The Hessian is of the scalar function w^T f(x), where w is the
+        weights vector and f is the compiled expression.
+
+        Args:
+            weights: array of length expr_size
+
+        Returns scipy.sparse.csr_matrix of shape (n_vars, n_vars).
+        """
+        weights = np.asarray(weights, dtype=np.float64).ravel()
+        self._set_point()
+        _C.problem_jacobian(self._problem)
+        data, indices, indptr, (m, n) = _C.problem_hessian(
+            self._problem, 0.0, weights
+        )
+        return scipy.sparse.csr_matrix((data, indices, indptr), shape=(m, n))
diff --git a/sparsediffpy/_core/_constants.py b/sparsediffpy/_core/_constants.py
new file mode 100644
index 0000000..ddbc6c0
--- /dev/null
+++ b/sparsediffpy/_core/_constants.py
@@ -0,0 +1,81 @@
+"""Constant and SparseConstant expression nodes, plus _wrap_constant."""
+
+import numpy as np
+import scipy.sparse
+
+
+class Constant:
+    """A fixed dense constant in the expression tree.
+
+    Stores values in column-major (Fortran) flat order to match the C layer.
+    """
+
+    __array_ufunc__ = None
+    __array_priority__ = 20
+
+    def __init__(self, value, shape):
+        self.shape = shape
+        self._value_flat = np.asarray(value, dtype=np.float64).ravel(order="F")
+        expected_size = shape[0] * shape[1]
+        if self._value_flat.size != expected_size:
+            raise ValueError(
+                f"Constant value has {self._value_flat.size} elements, "
+                f"expected {expected_size} for shape {shape}"
+            )
+
+
+class SparseConstant:
+    """A fixed sparse constant in the expression tree.
+
+    Stores CSR arrays for use with make_left_matmul / make_right_matmul.
+    """
+
+    __array_ufunc__ = None
+    __array_priority__ = 20
+
+    def __init__(self, csr_matrix):
+        csr = scipy.sparse.csr_matrix(csr_matrix)
+        self.shape = (csr.shape[0], csr.shape[1])
+        self._csr_data = np.asarray(csr.data, dtype=np.float64)
+        self._csr_indices = np.asarray(csr.indices, dtype=np.int32)
+        self._csr_indptr = np.asarray(csr.indptr, dtype=np.int32)
+
+    def _to_dense_flat(self):
+        """Convert to dense column-major flat array (for standalone use)."""
+        dense = scipy.sparse.csr_matrix(
+            (self._csr_data, self._csr_indices, self._csr_indptr),
+            shape=self.shape,
+        ).toarray()
+        return dense.ravel(order="F").astype(np.float64)
+
+
+def _wrap_constant(value):
+    """Wrap a raw value into an expression node.
+
+    - Expression subclass -> return as-is
+    - int / float -> Constant with shape (1, 1)
+    - np.ndarray 1D (n,) -> Constant with shape (n, 1) (column vector)
+    - np.ndarray 2D (m, n) -> Constant with shape (m, n)
+    - scipy.sparse -> SparseConstant
+    """
+    # Avoid circular import: check for Expression base by duck-typing
+    # (has a .shape attribute and is from our module)
+    if hasattr(value, "_is_sparsediff_expr"):
+        return value
+
+    if isinstance(value, (int, float)):
+        return Constant(np.array([float(value)]), (1, 1))
+
+    if isinstance(value, np.ndarray):
+        if value.ndim == 0:
+            return Constant(np.array([value.item()]), (1, 1))
+        if value.ndim == 1:
+            return Constant(value, (value.shape[0], 1))
+        if value.ndim == 2:
+            return Constant(value, (value.shape[0], value.shape[1]))
+        raise ValueError(f"Cannot wrap {value.ndim}D array as constant")
+
+    if scipy.sparse.issparse(value):
+        return SparseConstant(value)
+
+    raise TypeError(f"Cannot convert {type(value).__name__} to expression")
diff --git a/sparsediffpy/_core/_expression.py b/sparsediffpy/_core/_expression.py
new file mode 100644
index 0000000..ad6f9d5
--- /dev/null
+++ b/sparsediffpy/_core/_expression.py
@@ -0,0 +1,242 @@
+"""Expression base class, operator overloading, and dispatch helpers.
+
+Node types are defined in _nodes_affine.py, _nodes_elementwise.py,
+_nodes_bivariate.py, and _nodes_other.py.
+"""
+
+import numpy as np
+
+from sparsediffpy._core._constants import (
+    Constant,
+    SparseConstant,
+    _wrap_constant,
+)
+from sparsediffpy._core._shapes import (
+    broadcast_shape,
+    check_matmul_shapes,
+    is_scalar,
+)
+
+
+class Expression:
+    """Base class for all expression tree nodes."""
+
+    _is_sparsediff_expr = True
+    shape = None  # (d1, d2), set by subclasses
+
+    # Tell NumPy to defer to our operators instead of trying its own
+    __array_ufunc__ = None
+    __array_priority__ = 20
+
+    def __add__(self, other):
+        other = _wrap_constant(other)
+        return _make_add(self, other)
+
+    def __radd__(self, other):
+        other = _wrap_constant(other)
+        return _make_add(other, self)
+
+    def __sub__(self, other):
+        other = _wrap_constant(other)
+        from sparsediffpy._core._nodes_affine import Neg
+        return _make_add(self, Neg(other))
+
+    def __rsub__(self, other):
+        other = _wrap_constant(other)
+        from sparsediffpy._core._nodes_affine import Neg
+        return _make_add(other, Neg(self))
+
+    def __neg__(self):
+        from sparsediffpy._core._nodes_affine import Neg
+        return Neg(self)
+
+    def __mul__(self, other):
+        other = _wrap_constant(other)
+        return _make_mul(self, other)
+
+    def __rmul__(self, other):
+        other = _wrap_constant(other)
+        return _make_mul(other, self)
+
+    def __matmul__(self, other):
+        other = _wrap_constant(other)
+        return _make_matmul(self, other)
+
+    def __rmatmul__(self, other):
+        other = _wrap_constant(other)
+        return _make_matmul(other, self)
+
+    def __pow__(self, exponent):
+        if not isinstance(exponent, (int, float)):
+            raise TypeError("Exponent must be a constant number")
+        from sparsediffpy._core._nodes_elementwise import Power
+        return Power(self, float(exponent))
+
+    def __getitem__(self, key):
+        return _make_index(self, key)
+
+    @property
+    def T(self):
+        from sparsediffpy._core._nodes_affine import Transpose
+        return Transpose(self)
+
+    @property
+    def size(self):
+        return self.shape[0] * self.shape[1]
+
+
+# ---------------------------------------------------------------------------
+# Make Constant and SparseConstant behave as expressions
+# ---------------------------------------------------------------------------
+
+for _cls in (Constant, SparseConstant):
+    _cls._is_sparsediff_expr = True
+    _cls.__array_ufunc__ = None
+    _cls.__array_priority__ = 20
+    _cls.__add__ = Expression.__add__
+    _cls.__radd__ = Expression.__radd__
+    _cls.__sub__ = Expression.__sub__
+    _cls.__rsub__ = Expression.__rsub__
+    _cls.__neg__ = Expression.__neg__
+    _cls.__mul__ = Expression.__mul__
+    _cls.__rmul__ = Expression.__rmul__
+    _cls.__matmul__ = Expression.__matmul__
+    _cls.__rmatmul__ = Expression.__rmatmul__
+    _cls.__pow__ = Expression.__pow__
+    _cls.__getitem__ = Expression.__getitem__
+    _cls.T = Expression.T
+    _cls.size = Expression.size
+
+
+# ---------------------------------------------------------------------------
+# Operator dispatch helpers
+# ---------------------------------------------------------------------------
+
+def _maybe_broadcast(node, target_shape):
+    from sparsediffpy._core._nodes_affine import Broadcast
+    if node.shape == target_shape:
+        return node
+    return Broadcast(node, target_shape)
+
+
+def _is_param_like(node):
+    from sparsediffpy._core._scope import Parameter
+    return isinstance(node, (Constant, SparseConstant, Parameter))
+
+
+def _make_add(left, right):
+    from sparsediffpy._core._nodes_affine import Add
+    result_shape, left_bc, right_bc = broadcast_shape(left.shape, right.shape)
+    if left_bc:
+        left = _maybe_broadcast(left, result_shape)
+    if right_bc:
+        right = _maybe_broadcast(right, result_shape)
+    return Add(left, right)
+
+
+def _make_mul(left, right):
+    from sparsediffpy._core._nodes_affine import ParamScalarMult, ParamVectorMult
+    from sparsediffpy._core._nodes_bivariate import Multiply
+    from sparsediffpy._core._scope import Parameter
+
+    if _is_param_like(left) and is_scalar(left.shape):
+        return ParamScalarMult(left, right)
+    if _is_param_like(right) and is_scalar(right.shape):
+        return ParamScalarMult(right, left)
+
+    if _is_param_like(left) and left.shape == right.shape:
+        return ParamVectorMult(left, right)
+    if _is_param_like(right) and right.shape == left.shape:
+        return ParamVectorMult(right, left)
+
+    result_shape, left_bc, right_bc = broadcast_shape(left.shape, right.shape)
+    if left_bc:
+        left = _maybe_broadcast(left, result_shape)
+    if right_bc:
+        right = _maybe_broadcast(right, result_shape)
+
+    if _is_param_like(left):
+        return ParamVectorMult(left, right)
+    if _is_param_like(right):
+        return ParamVectorMult(right, left)
+
+    return Multiply(left, right)
+
+
+def _make_matmul(left, right):
+    from sparsediffpy._core._nodes_affine import LeftMatMul, RightMatMul
+    from sparsediffpy._core._nodes_bivariate import MatMul
+    from sparsediffpy._core._scope import Parameter
+
+    result_shape = check_matmul_shapes(left.shape, right.shape)
+    left_is_param = isinstance(left, (Constant, SparseConstant, Parameter))
+    right_is_param = isinstance(right, (Constant, SparseConstant, Parameter))
+
+    if left_is_param and not right_is_param:
+        return LeftMatMul(left, right, result_shape)
+    if right_is_param and not left_is_param:
+        return RightMatMul(right, left, result_shape)
+    return MatMul(left, right, result_shape)
+
+
+def _make_index(node, key):
+    from sparsediffpy._core._nodes_affine import Index
+
+    d1, d2 = node.shape
+
+    if isinstance(key, tuple):
+        if len(key) != 2:
+            raise IndexError("Only 1D or 2D indexing supported")
+        row_key, col_key = key
+        row_indices = _resolve_axis_index(row_key, d1)
+        col_indices = _resolve_axis_index(col_key, d2)
+        flat_indices = []
+        for c in col_indices:
+            for r in row_indices:
+                flat_indices.append(r + c * d1)
+        out_d1 = len(row_indices)
+        out_d2 = len(col_indices)
+    else:
+        if d2 == 1:
+            indices = _resolve_axis_index(key, d1)
+            flat_indices = indices
+            out_d1 = len(indices)
+            out_d2 = 1
+        elif d1 == 1:
+            indices = _resolve_axis_index(key, d2)
+            flat_indices = [i * d1 for i in indices]
+            out_d1 = 1
+            out_d2 = len(indices)
+        else:
+            total = d1 * d2
+            indices = _resolve_axis_index(key, total)
+            flat_indices = indices
+            out_d1 = len(indices)
+            out_d2 = 1
+
+    result_shape = (out_d1, out_d2)
+    flat_arr = np.array(flat_indices, dtype=np.int32)
+    return Index(node, flat_arr, result_shape)
+
+
+def _resolve_axis_index(key, length):
+    if isinstance(key, (int, np.integer)):
+        idx = int(key)
+        if idx < 0:
+            idx += length
+        if idx < 0 or idx >= length:
+            raise IndexError(f"Index {key} out of range for axis of length {length}")
+        return [idx]
+    if isinstance(key, slice):
+        return list(range(*key.indices(length)))
+    if isinstance(key, (list, np.ndarray)):
+        out = []
+        for i in key:
+            idx = int(i)
+            if idx < 0:
+                idx += length
+            if idx < 0 or idx >= length:
+                raise IndexError(f"Index {i} out of range for axis of length {length}")
+            out.append(idx)
+        return out
+    raise IndexError(f"Unsupported index type: {type(key).__name__}")
diff --git a/sparsediffpy/_core/_functions.py b/sparsediffpy/_core/_functions.py
new file mode 100644
index 0000000..bd308ee
--- /dev/null
+++ b/sparsediffpy/_core/_functions.py
@@ -0,0 +1,216 @@
+"""Module-level named functions: sp.sin, sp.exp, sp.hstack, etc."""
+
+import numpy as np
+import scipy.sparse
+
+from sparsediffpy._core._constants import _wrap_constant
+from sparsediffpy._core._nodes_affine import (
+    DiagVec, HStack, Reshape, Sum, Trace, Transpose,
+)
+from sparsediffpy._core._nodes_bivariate import QuadOverLin, RelEntr
+from sparsediffpy._core._nodes_elementwise import (
+    Asinh, Atanh, Cos, Entr, Exp, Log, Logistic, NormalCdf, Power,
+    Sin, Sinh, Tan, Tanh, Xexp,
+)
+from sparsediffpy._core._nodes_other import Prod, ProdAxisOne, ProdAxisZero, QuadForm
+from sparsediffpy._core._shapes import validate_shape
+
+
+def _ensure_expr(x):
+    if hasattr(x, "_is_sparsediff_expr"):
+        return x
+    return _wrap_constant(x)
+
+
+# ---------------------------------------------------------------------------
+# Unary elementwise functions
+# ---------------------------------------------------------------------------
+
+def sin(x):
+    return Sin(_ensure_expr(x))
+
+def cos(x):
+    return Cos(_ensure_expr(x))
+
+def exp(x):
+    return Exp(_ensure_expr(x))
+
+def log(x):
+    return Log(_ensure_expr(x))
+
+def tan(x):
+    return Tan(_ensure_expr(x))
+
+def sinh(x):
+    return Sinh(_ensure_expr(x))
+
+def tanh(x):
+    return Tanh(_ensure_expr(x))
+
+def asinh(x):
+    return Asinh(_ensure_expr(x))
+
+def atanh(x):
+    return Atanh(_ensure_expr(x))
+
+def logistic(x):
+    return Logistic(_ensure_expr(x))
+
+def normal_cdf(x):
+    return NormalCdf(_ensure_expr(x))
+
+def entr(x):
+    return Entr(_ensure_expr(x))
+
+def xexp(x):
+    return Xexp(_ensure_expr(x))
+
+def diag_vec(x):
+    return DiagVec(_ensure_expr(x))
+
+
+# ---------------------------------------------------------------------------
+# Unary with extra arguments
+# ---------------------------------------------------------------------------
+
+def power(x, p):
+    return Power(_ensure_expr(x), float(p))
+
+
+def sum(x, axis=None):
+    """Sum reduction.
+
+    axis=None: sum all elements -> (1,1)
+    axis=0: sum along rows (collapse d1) -> (1, d2)
+    axis=1: sum along columns (collapse d2) -> (d1, 1)
+    """
+    c_axis = -1 if axis is None else axis
+    return Sum(_ensure_expr(x), c_axis)
+
+
+def prod(x, axis=None):
+    """Product reduction.
+
+    axis=None: product of all elements -> (1,1)
+    axis=0: product along rows -> (1, d2)
+    axis=1: product along columns -> (d1, 1)
+    """
+    x = _ensure_expr(x)
+    if axis is None:
+        return Prod(x)
+    elif axis == 0:
+        return ProdAxisZero(x)
+    elif axis == 1:
+        return ProdAxisOne(x)
+    else:
+        raise ValueError(f"Invalid axis {axis}, must be None, 0, or 1")
+
+
+def reshape(x, d1, d2):
+    validate_shape(d1, d2)
+    return Reshape(_ensure_expr(x), (d1, d2))
+
+
+def trace(x):
+    return Trace(_ensure_expr(x))
+
+
+# ---------------------------------------------------------------------------
+# Structural
+# ---------------------------------------------------------------------------
+
+def hstack(expressions):
+    """Horizontally stack expressions. All must have the same d1 (rows).
+
+    Result shape: (d1, sum of all d2).
+    """
+    exprs = [_ensure_expr(e) for e in expressions]
+    if not exprs:
+        raise ValueError("hstack requires at least one expression")
+
+    d1 = exprs[0].shape[0]
+    for e in exprs[1:]:
+        if e.shape[0] != d1:
+            raise ValueError(
+                f"hstack: all expressions must have the same number of rows, "
+                f"got {d1} and {e.shape[0]}"
+            )
+
+    total_d2 = builtins_sum(e.shape[1] for e in exprs)
+    return HStack(exprs, (d1, total_d2))
+
+
+def vstack(expressions):
+    """Vertically stack expressions. All must have the same d2 (columns).
+
+    Implemented as transpose(hstack(transpose(each))).
+    """
+    exprs = [_ensure_expr(e) for e in expressions]
+    if not exprs:
+        raise ValueError("vstack requires at least one expression")
+
+    d2 = exprs[0].shape[1]
+    for e in exprs[1:]:
+        if e.shape[1] != d2:
+            raise ValueError(
+                f"vstack: all expressions must have the same number of columns, "
+                f"got {d2} and {e.shape[1]}"
+            )
+
+    transposed = [Transpose(e) for e in exprs]
+    total_d1 = builtins_sum(e.shape[0] for e in exprs)
+    h = HStack(transposed, (d2, total_d1))
+    return Transpose(h)
+
+
+# Keep a reference to Python's built-in sum (shadowed by our sum function)
+import builtins as _builtins
+builtins_sum = _builtins.sum
+
+
+# ---------------------------------------------------------------------------
+# Special functions
+# ---------------------------------------------------------------------------
+
+def quad_form(x, Q):
+    """Quadratic form x' Q x.
+
+    x must be a column vector (n, 1).
+    Q must be a scipy.sparse matrix or np.ndarray of shape (n, n).
+    """
+    x = _ensure_expr(x)
+    if x.shape[1] != 1:
+        raise ValueError(f"quad_form: x must be a column vector, got shape {x.shape}")
+
+    if not scipy.sparse.issparse(Q):
+        Q = scipy.sparse.csr_matrix(Q)
+    else:
+        Q = Q.tocsr()
+
+    n = x.shape[0]
+    if Q.shape != (n, n):
+        raise ValueError(
+            f"quad_form: Q shape {Q.shape} doesn't match x shape {x.shape}"
+        )
+
+    return QuadForm(
+        x,
+        Q_csr_data=np.asarray(Q.data, dtype=np.float64),
+        Q_csr_indices=np.asarray(Q.indices, dtype=np.int32),
+        Q_csr_indptr=np.asarray(Q.indptr, dtype=np.int32),
+        Q_shape=Q.shape,
+    )
+
+
+def quad_over_lin(x, z):
+    """sum(x^2) / z where z is a scalar expression."""
+    x = _ensure_expr(x)
+    z = _ensure_expr(z)
+    return QuadOverLin(x, z)
+
+
+def rel_entr(x, y):
+    """x * log(x / y) elementwise."""
+    x = _ensure_expr(x)
+    y = _ensure_expr(y)
+    return RelEntr(x, y)
diff --git a/sparsediffpy/_core/_nodes_affine.py b/sparsediffpy/_core/_nodes_affine.py
new file mode 100644
index 0000000..fa766bb
--- /dev/null
+++ b/sparsediffpy/_core/_nodes_affine.py
@@ -0,0 +1,130 @@
+"""Affine expression nodes: linear/affine operations on expressions."""
+
+import numpy as np
+
+from sparsediffpy._core._expression import Expression
+
+
+class _UnaryOp(Expression):
+    def __init__(self, child):
+        self.child = child
+        self.shape = child.shape
+
+
+class Neg(_UnaryOp):
+    pass
+
+
+class Transpose(Expression):
+    def __init__(self, child):
+        self.child = child
+        self.shape = (child.shape[1], child.shape[0])
+
+
+class DiagVec(Expression):
+    """Create a diagonal matrix from a column vector (n,1) -> (n,n)."""
+    def __init__(self, child):
+        if child.shape[1] != 1:
+            raise ValueError(f"diag_vec requires a column vector, got shape {child.shape}")
+        self.child = child
+        self.shape = (child.shape[0], child.shape[0])
+
+
+class Trace(Expression):
+    def __init__(self, child):
+        if child.shape[0] != child.shape[1]:
+            raise ValueError(f"trace requires a square matrix, got shape {child.shape}")
+        self.child = child
+        self.shape = (1, 1)
+
+
+class Reshape(Expression):
+    def __init__(self, child, new_shape):
+        old_size = child.shape[0] * child.shape[1]
+        new_size = new_shape[0] * new_shape[1]
+        if old_size != new_size:
+            raise ValueError(
+                f"Cannot reshape {child.shape} (size {old_size}) to "
+                f"{new_shape} (size {new_size})"
+            )
+        self.child = child
+        self.shape = new_shape
+
+
+class Broadcast(Expression):
+    """Broadcast scalar/row/column to a target shape."""
+    def __init__(self, child, target_shape):
+        self.child = child
+        self.shape = target_shape
+
+
+class Sum(Expression):
+    """Sum reduction. C layer always returns row vectors:
+    axis=-1: (1,1), axis=0: (1,d2), axis=1: (1,d1)."""
+    def __init__(self, child, axis):
+        self.child = child
+        self.axis = axis
+        d1, d2 = child.shape
+        if axis == -1:
+            self.shape = (1, 1)
+        elif axis == 0:
+            self.shape = (1, d2)
+        elif axis == 1:
+            self.shape = (1, d1)
+        else:
+            raise ValueError(f"Invalid axis {axis}, must be -1, 0, or 1")
+
+
+class Add(Expression):
+    def __init__(self, left, right):
+        assert left.shape == right.shape, f"Add shape mismatch: {left.shape} vs {right.shape}"
+        self.left = left
+        self.right = right
+        self.shape = left.shape
+
+
+class HStack(Expression):
+    """Horizontal concatenation."""
+    def __init__(self, children, result_shape):
+        self.children = children
+        self.shape = result_shape
+
+
+class Index(Expression):
+    """Indexing with flat column-major indices."""
+    def __init__(self, child, flat_indices, result_shape):
+        self.child = child
+        self.flat_indices = flat_indices
+        self.shape = result_shape
+
+
+class ParamScalarMult(Expression):
+    """a * f(x) where a is a scalar constant/parameter."""
+    def __init__(self, param_expr, child):
+        self.param_expr = param_expr
+        self.child = child
+        self.shape = child.shape
+
+
+class ParamVectorMult(Expression):
+    """a . f(x) elementwise where a is a constant/parameter of matching shape."""
+    def __init__(self, param_expr, child):
+        self.param_expr = param_expr
+        self.child = child
+        self.shape = child.shape
+
+
+class LeftMatMul(Expression):
+    """A @ f(x) where A is a constant/sparse constant/parameter matrix."""
+    def __init__(self, matrix_expr, child, result_shape):
+        self.matrix_expr = matrix_expr
+        self.child = child
+        self.shape = result_shape
+
+
+class RightMatMul(Expression):
+    """f(x) @ A where A is a constant/sparse constant/parameter matrix."""
+    def __init__(self, matrix_expr, child, result_shape):
+        self.matrix_expr = matrix_expr
+        self.child = child
+        self.shape = result_shape
diff --git a/sparsediffpy/_core/_nodes_bivariate.py b/sparsediffpy/_core/_nodes_bivariate.py
new file mode 100644
index 0000000..3ecd70a
--- /dev/null
+++ b/sparsediffpy/_core/_nodes_bivariate.py
@@ -0,0 +1,41 @@
+"""Bivariate expression nodes: operations on two variable-dependent expressions."""
+
+from sparsediffpy._core._expression import Expression
+from sparsediffpy._core._shapes import is_scalar
+
+
+class Multiply(Expression):
+    """Elementwise multiply (both operands are variable-dependent)."""
+    def __init__(self, left, right):
+        assert left.shape == right.shape, f"Multiply shape mismatch: {left.shape} vs {right.shape}"
+        self.left = left
+        self.right = right
+        self.shape = left.shape
+
+
+class MatMul(Expression):
+    """Matrix multiply where both operands are variable-dependent."""
+    def __init__(self, left, right, result_shape):
+        self.left = left
+        self.right = right
+        self.shape = result_shape
+
+
+class QuadOverLin(Expression):
+    """sum(x^2) / z where z is a scalar."""
+    def __init__(self, x, z):
+        if not is_scalar(z.shape):
+            raise ValueError(f"quad_over_lin: z must be scalar, got shape {z.shape}")
+        self.x = x
+        self.z = z
+        self.shape = (1, 1)
+
+
+class RelEntr(Expression):
+    """x * log(x / y) elementwise."""
+    def __init__(self, x, y):
+        if x.shape != y.shape:
+            raise ValueError(f"rel_entr: shape mismatch {x.shape} vs {y.shape}")
+        self.x = x
+        self.y = y
+        self.shape = x.shape
diff --git a/sparsediffpy/_core/_nodes_elementwise.py b/sparsediffpy/_core/_nodes_elementwise.py
new file mode 100644
index 0000000..74d89e7
--- /dev/null
+++ b/sparsediffpy/_core/_nodes_elementwise.py
@@ -0,0 +1,95 @@
+"""Elementwise expression nodes: unary operations applied element-by-element."""
+
+from sparsediffpy._core._expression import Expression
+from sparsediffpy._core._nodes_affine import _UnaryOp, Index
+
+
+# --- Full domain ---
+
+class Exp(_UnaryOp):
+    pass
+
+
+class Sin(_UnaryOp):
+    pass
+
+
+class Cos(_UnaryOp):
+    pass
+
+
+class Sinh(_UnaryOp):
+    pass
+
+
+class Tanh(_UnaryOp):
+    pass
+
+
+class Asinh(_UnaryOp):
+    pass
+
+
+class Logistic(_UnaryOp):
+    pass
+
+
+class NormalCdf(_UnaryOp):
+    pass
+
+
+class Xexp(_UnaryOp):
+    pass
+
+
+class Power(Expression):
+    def __init__(self, child, exponent):
+        self.child = child
+        self.exponent = exponent
+        self.shape = child.shape
+
+
+# --- Restricted domain ---
+# The C engine's restricted-domain Jacobian code does not correctly handle
+# children with non-trivial Jacobian structure (e.g., index nodes with
+# nonzero offset). These ops require the child to be a plain variable or
+# a full-domain composition.
+
+def _check_no_index_child(child, op_name):
+    """Raise if the immediate child is an Index node.
+
+    The C engine's restricted-domain atoms assume the child's Jacobian has
+    columns starting at offset 0. Applying them directly to an Index node
+    with nonzero offset produces wrong Jacobian column positions.
+    """
+    if isinstance(child, Index):
+        raise ValueError(
+            f"{op_name} cannot be applied directly to an indexed expression. "
+            f"This is a known limitation of the C engine's restricted-domain "
+            f"Jacobian computation. As a workaround, use a separate variable "
+            f"for the indexed slice."
+        )
+
+
+class Log(_UnaryOp):
+    def __init__(self, child):
+        _check_no_index_child(child, "log")
+        super().__init__(child)
+
+
+class Tan(_UnaryOp):
+    def __init__(self, child):
+        _check_no_index_child(child, "tan")
+        super().__init__(child)
+
+
+class Atanh(_UnaryOp):
+    def __init__(self, child):
+        _check_no_index_child(child, "atanh")
+        super().__init__(child)
+
+
+class Entr(_UnaryOp):
+    def __init__(self, child):
+        _check_no_index_child(child, "entr")
+        super().__init__(child)
diff --git a/sparsediffpy/_core/_nodes_other.py b/sparsediffpy/_core/_nodes_other.py
new file mode 100644
index 0000000..63cceb3
--- /dev/null
+++ b/sparsediffpy/_core/_nodes_other.py
@@ -0,0 +1,35 @@
+"""Other expression nodes: quad_form, prod variants."""
+
+from sparsediffpy._core._expression import Expression
+
+
+class QuadForm(Expression):
+    """x' Q x where Q is a constant sparse matrix."""
+    def __init__(self, child, Q_csr_data, Q_csr_indices, Q_csr_indptr, Q_shape):
+        self.child = child
+        self.Q_csr_data = Q_csr_data
+        self.Q_csr_indices = Q_csr_indices
+        self.Q_csr_indptr = Q_csr_indptr
+        self.Q_shape = Q_shape
+        self.shape = (1, 1)
+
+
+class Prod(Expression):
+    """Product of all elements -> (1, 1)."""
+    def __init__(self, child):
+        self.child = child
+        self.shape = (1, 1)
+
+
+class ProdAxisZero(Expression):
+    """Product along axis 0 -> (1, d2)."""
+    def __init__(self, child):
+        self.child = child
+        self.shape = (1, child.shape[1])
+
+
+class ProdAxisOne(Expression):
+    """Product along axis 1 -> (1, d1). C layer returns row vector."""
+    def __init__(self, child):
+        self.child = child
+        self.shape = (1, child.shape[0])
diff --git a/sparsediffpy/_core/_registry.py b/sparsediffpy/_core/_registry.py
new file mode 100644
index 0000000..17a5e3d
--- /dev/null
+++ b/sparsediffpy/_core/_registry.py
@@ -0,0 +1,178 @@
+"""Node converter registry: maps expression node types to C diff engine constructors.
+
+Each converter receives (node, children_caps) where node is the Python expression
+node and children_caps are already-converted C capsules. matmul and multiply are
+handled separately in _compile.py (they need param_dict for parameter support).
+
+Modelled after DNLP's registry.py.
+"""
+
+import numpy as np
+
+from sparsediffpy import _sparsediffengine as _C
+from sparsediffpy._core._constants import Constant, SparseConstant
+from sparsediffpy._core._nodes_affine import (
+    Add, Broadcast, DiagVec, HStack, Index, LeftMatMul, Neg,
+    ParamScalarMult, ParamVectorMult, Reshape, RightMatMul, Sum, Trace,
+    Transpose,
+)
+from sparsediffpy._core._nodes_bivariate import (
+    MatMul, Multiply, QuadOverLin, RelEntr,
+)
+from sparsediffpy._core._nodes_elementwise import (
+    Asinh, Atanh, Cos, Entr, Exp, Log, Logistic, NormalCdf, Power,
+    Sin, Sinh, Tan, Tanh, Xexp,
+)
+from sparsediffpy._core._nodes_other import (
+    Prod, ProdAxisOne, ProdAxisZero, QuadForm,
+)
+from sparsediffpy._core._scope import Parameter
+
+
+# ---------------------------------------------------------------------------
+# Matmul helpers (matching DNLP's helpers.py)
+# ---------------------------------------------------------------------------
+
+def make_sparse_left_matmul(param_node, child_cap, matrix):
+    """A @ f(x) with sparse constant A."""
+    return _C.make_left_matmul(
+        param_node, child_cap, "sparse",
+        matrix._csr_data, matrix._csr_indices, matrix._csr_indptr,
+        matrix.shape[0], matrix.shape[1],
+    )
+
+
+def make_dense_left_matmul(param_node, child_cap, A_flat, m, n):
+    """A @ f(x) with dense constant A."""
+    return _C.make_left_matmul(param_node, child_cap, "dense", A_flat, m, n)
+
+
+def make_sparse_right_matmul(param_node, child_cap, matrix):
+    """f(x) @ A with sparse constant A."""
+    return _C.make_right_matmul(
+        param_node, child_cap, "sparse",
+        matrix._csr_data, matrix._csr_indices, matrix._csr_indptr,
+        matrix.shape[0], matrix.shape[1],
+    )
+
+
+def make_dense_right_matmul(param_node, child_cap, A_flat, m, n):
+    """f(x) @ A with dense constant A."""
+    return _C.make_right_matmul(param_node, child_cap, "dense", A_flat, m, n)
+
+
+def _to_dense_row_major(matrix):
+    """Convert a Constant or Parameter to row-major flat data for dense matmul."""
+    m, n = matrix.shape
+    return matrix._value_flat.reshape((m, n), order="F").flatten(order="C")
+
+
+# ---------------------------------------------------------------------------
+# Individual converters for nodes needing special handling
+# ---------------------------------------------------------------------------
+
+def convert_hstack(node, child_caps):
+    return _C.make_hstack(child_caps)
+
+
+def convert_index(node, child_caps):
+    return _C.make_index(
+        child_caps[0], node.shape[0], node.shape[1], node.flat_indices
+    )
+
+
+def convert_sum(node, child_caps):
+    return _C.make_sum(child_caps[0], node.axis)
+
+
+def convert_power(node, child_caps):
+    return _C.make_power(child_caps[0], node.exponent)
+
+
+def convert_reshape(node, child_caps):
+    return _C.make_reshape(child_caps[0], node.shape[0], node.shape[1])
+
+
+def convert_broadcast(node, child_caps):
+    return _C.make_broadcast(child_caps[0], node.shape[0], node.shape[1])
+
+
+def convert_quad_over_lin(node, child_caps):
+    return _C.make_quad_over_lin(child_caps[0], child_caps[1])
+
+
+def convert_rel_entr(node, child_caps):
+    return _C.make_rel_entr(child_caps[0], child_caps[1])
+
+
+def convert_quad_form(node, child_caps):
+    return _C.make_quad_form(
+        child_caps[0],
+        node.Q_csr_data, node.Q_csr_indices, node.Q_csr_indptr,
+        node.Q_shape[0], node.Q_shape[1],
+    )
+
+
+def convert_param_scalar_mult(node, child_caps):
+    return _C.make_param_scalar_mult(child_caps[0], child_caps[1])
+
+
+def convert_param_vector_mult(node, child_caps):
+    return _C.make_param_vector_mult(child_caps[0], child_caps[1])
+
+
+# ---------------------------------------------------------------------------
+# Registry dict
+# ---------------------------------------------------------------------------
+
+ATOM_CONVERTERS = {
+    # Elementwise unary (full domain)
+    Neg: lambda _node, caps: _C.make_neg(caps[0]),
+    Exp: lambda _node, caps: _C.make_exp(caps[0]),
+    Sin: lambda _node, caps: _C.make_sin(caps[0]),
+    Cos: lambda _node, caps: _C.make_cos(caps[0]),
+    Sinh: lambda _node, caps: _C.make_sinh(caps[0]),
+    Tanh: lambda _node, caps: _C.make_tanh(caps[0]),
+    Asinh: lambda _node, caps: _C.make_asinh(caps[0]),
+    Logistic: lambda _node, caps: _C.make_logistic(caps[0]),
+    NormalCdf: lambda _node, caps: _C.make_normal_cdf(caps[0]),
+    Xexp: lambda _node, caps: _C.make_xexp(caps[0]),
+
+    # Elementwise unary (restricted domain)
+    Log: lambda _node, caps: _C.make_log(caps[0]),
+    Tan: lambda _node, caps: _C.make_tan(caps[0]),
+    Atanh: lambda _node, caps: _C.make_atanh(caps[0]),
+    Entr: lambda _node, caps: _C.make_entr(caps[0]),
+
+    # Elementwise unary with extra args
+    Power: convert_power,
+
+    # Affine unary
+    Transpose: lambda _node, caps: _C.make_transpose(caps[0]),
+    DiagVec: lambda _node, caps: _C.make_diag_vec(caps[0]),
+    Trace: lambda _node, caps: _C.make_trace(caps[0]),
+
+    # Reductions
+    Sum: convert_sum,
+    Prod: lambda _node, caps: _C.make_prod(caps[0]),
+    ProdAxisZero: lambda _node, caps: _C.make_prod_axis_zero(caps[0]),
+    ProdAxisOne: lambda _node, caps: _C.make_prod_axis_one(caps[0]),
+
+    # Shape operations
+    Reshape: convert_reshape,
+    Broadcast: convert_broadcast,
+
+    # Binary (both variable-dependent)
+    Add: lambda _node, caps: _C.make_add(caps[0], caps[1]),
+    Multiply: lambda _node, caps: _C.make_multiply(caps[0], caps[1]),
+    MatMul: lambda _node, caps: _C.make_matmul(caps[0], caps[1]),
+
+    # Bivariate
+    QuadOverLin: convert_quad_over_lin,
+    RelEntr: convert_rel_entr,
+    QuadForm: convert_quad_form,
+
+    # Structural
+    HStack: convert_hstack,
+    Index: convert_index,
+}
diff --git a/sparsediffpy/_core/_scope.py b/sparsediffpy/_core/_scope.py
new file mode 100644
index 0000000..86339e6
--- /dev/null
+++ b/sparsediffpy/_core/_scope.py
@@ -0,0 +1,133 @@
+"""Scope, Variable, and Parameter."""
+
+import numpy as np
+
+from sparsediffpy._core._expression import Expression
+from sparsediffpy._core._shapes import validate_shape
+
+
+class Variable(Expression):
+    """A decision variable in the expression tree.
+
+    Created by Scope.Variable(). Has a .value property that reads/writes
+    into the scope's flat value buffer.
+    """
+
+    def __init__(self, scope, var_id, shape):
+        self._scope = scope
+        self._var_id = var_id
+        self.shape = shape
+
+    @property
+    def value(self):
+        size = self.shape[0] * self.shape[1]
+        return self._scope._flat_values[self._var_id:self._var_id + size].copy()
+
+    @value.setter
+    def value(self, val):
+        val = np.asarray(val, dtype=np.float64).ravel()
+        size = self.shape[0] * self.shape[1]
+        if val.size != size:
+            raise ValueError(
+                f"Expected {size} elements for Variable with shape {self.shape}, "
+                f"got {val.size}"
+            )
+        self._scope._flat_values[self._var_id:self._var_id + size] = val
+
+
+class Parameter(Expression):
+    """An updatable parameter in the expression tree.
+
+    Created by Scope.Parameter(). Values are stored on the parameter itself
+    (not in the scope's flat buffer). Updated via .value property.
+    """
+
+    def __init__(self, scope, param_id, shape, value=None):
+        self._scope = scope
+        self._param_id = param_id
+        self.shape = shape
+        size = shape[0] * shape[1]
+        if value is not None:
+            self._value_flat = np.asarray(value, dtype=np.float64).ravel(order="F")
+            if self._value_flat.size != size:
+                raise ValueError(
+                    f"Parameter value has {self._value_flat.size} elements, "
+                    f"expected {size} for shape {shape}"
+                )
+        else:
+            self._value_flat = np.zeros(size, dtype=np.float64)
+
+    @property
+    def value(self):
+        return self._value_flat.copy()
+
+    @value.setter
+    def value(self, val):
+        val = np.asarray(val, dtype=np.float64).ravel(order="F")
+        size = self.shape[0] * self.shape[1]
+        if val.size != size:
+            raise ValueError(
+                f"Expected {size} elements for Parameter with shape {self.shape}, "
+                f"got {val.size}"
+            )
+        self._value_flat[:] = val
+
+
+# Patch _is_param_like to recognize Parameter
+# (already handled via lazy import in _expressions.py)
+
+
+class Scope:
+    """Owns the variable/parameter space and flat value buffer."""
+
+    def __init__(self):
+        self._variables = []
+        self._parameters = []
+        self._flat_values = np.zeros(0, dtype=np.float64)
+        self._next_var_offset = 0
+        self._next_param_offset = 0
+
+    def Variable(self, d1, d2):
+        """Create a new variable in this scope."""
+        validate_shape(d1, d2)
+        size = d1 * d2
+        var_id = self._next_var_offset
+
+        new_flat = np.zeros(self._next_var_offset + size, dtype=np.float64)
+        if self._next_var_offset > 0:
+            new_flat[:self._next_var_offset] = self._flat_values
+        self._flat_values = new_flat
+        self._next_var_offset += size
+
+        var = Variable(self, var_id, (d1, d2))
+        self._variables.append(var)
+        return var
+
+    def Parameter(self, d1, d2, value=None):
+        """Create a new updatable parameter in this scope."""
+        validate_shape(d1, d2)
+        size = d1 * d2
+        param_id = self._next_param_offset
+        self._next_param_offset += size
+
+        param = Parameter(self, param_id, (d1, d2), value)
+        self._parameters.append(param)
+        return param
+
+    def set_values(self, flat_array):
+        """Set all variable values at once from a flat array."""
+        flat_array = np.asarray(flat_array, dtype=np.float64)
+        if flat_array.size != self._flat_values.size:
+            raise ValueError(
+                f"Expected flat array of size {self._flat_values.size}, "
+                f"got {flat_array.size}"
+            )
+        self._flat_values[:] = flat_array
+
+    def get_values(self):
+        """Return a copy of the flat value buffer."""
+        return self._flat_values.copy()
+
+    @property
+    def total_var_size(self):
+        return self._next_var_offset
diff --git a/sparsediffpy/_core/_shapes.py b/sparsediffpy/_core/_shapes.py
new file mode 100644
index 0000000..fae16b3
--- /dev/null
+++ b/sparsediffpy/_core/_shapes.py
@@ -0,0 +1,86 @@
+"""Shape validation, broadcasting, and matmul checks.
+
+All shapes are 2-tuples (d1, d2) matching the C layer's convention.
+Column-major flat storage: flat_index = row + col * d1.
+"""
+
+
+def validate_shape(d1, d2):
+    """Check that d1 and d2 are positive integers."""
+    if not isinstance(d1, int) or not isinstance(d2, int):
+        raise TypeError(f"Shape dimensions must be integers, got ({type(d1).__name__}, {type(d2).__name__})")
+    if d1 <= 0 or d2 <= 0:
+        raise ValueError(f"Shape dimensions must be positive, got ({d1}, {d2})")
+
+
+def is_scalar(shape):
+    return shape == (1, 1)
+
+
+def is_column(shape):
+    return shape[1] == 1
+
+
+def is_row(shape):
+    return shape[0] == 1
+
+
+def broadcast_shape(left_shape, right_shape):
+    """Compute broadcast result shape for elementwise operations.
+
+    Returns (result_shape, left_needs_broadcast, right_needs_broadcast).
+    Raises ValueError if shapes are incompatible.
+
+    Rules (CVXPY/NumPy convention):
+      (1,1) + (m,n) -> (m,n)   broadcast scalar
+      (m,1) + (m,n) -> (m,n)   broadcast column
+      (1,n) + (m,n) -> (m,n)   broadcast row
+      (m,n) + (m,n) -> (m,n)   no broadcast
+    """
+    ld1, ld2 = left_shape
+    rd1, rd2 = right_shape
+
+    if left_shape == right_shape:
+        return left_shape, False, False
+
+    # Broadcast each dimension independently: 1 matches anything
+    if ld1 == rd1:
+        out_d1 = ld1
+    elif ld1 == 1:
+        out_d1 = rd1
+    elif rd1 == 1:
+        out_d1 = ld1
+    else:
+        raise ValueError(
+            f"Cannot broadcast shapes {left_shape} and {right_shape}: "
+            f"d1 mismatch ({ld1} vs {rd1})"
+        )
+
+    if ld2 == rd2:
+        out_d2 = ld2
+    elif ld2 == 1:
+        out_d2 = rd2
+    elif rd2 == 1:
+        out_d2 = ld2
+    else:
+        raise ValueError(
+            f"Cannot broadcast shapes {left_shape} and {right_shape}: "
+            f"d2 mismatch ({ld2} vs {rd2})"
+        )
+
+    result = (out_d1, out_d2)
+    return result, left_shape != result, right_shape != result
+
+
+def check_matmul_shapes(left_shape, right_shape):
+    """Validate matmul dimensions and return result shape.
+
+    Requires left_shape[1] == right_shape[0].
+    Returns (left_shape[0], right_shape[1]).
+    """
+    if left_shape[1] != right_shape[0]:
+        raise ValueError(
+            f"Matmul shape mismatch: ({left_shape[0]}, {left_shape[1]}) @ "
+            f"({right_shape[0]}, {right_shape[1]})"
+        )
+    return (left_shape[0], right_shape[1])
diff --git a/tests/affine/__init__.py b/tests/affine/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/affine/test_add.py b/tests/affine/test_add.py
new file mode 100644
index 0000000..bf9aa93
--- /dev/null
+++ b/tests/affine/test_add.py
@@ -0,0 +1,128 @@
+"""Tests for addition with all broadcast combinations."""
+
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+# --- Scalar + matrix broadcast ---
+
+def test_add_scalar_plus_matrix_jacobian(scope, rng):
+    a = scope.Variable(1, 1)
+    B = scope.Variable(3, 2)
+    f = a + B
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_add_scalar_plus_matrix_hessian(scope, rng):
+    a = scope.Variable(1, 1)
+    B = scope.Variable(3, 2)
+    f = a + B
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_point(scope, rng)
+    checker.check_hessian(x0, rng.standard_normal(6))
+
+
+# --- Column + matrix broadcast ---
+
+def test_add_column_plus_matrix_jacobian(scope, rng):
+    x = scope.Variable(3, 1)
+    Y = scope.Variable(3, 2)
+    f = x + Y
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_add_column_plus_matrix_hessian(scope, rng):
+    x = scope.Variable(3, 1)
+    Y = scope.Variable(3, 2)
+    f = x + Y
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_point(scope, rng)
+    checker.check_hessian(x0, rng.standard_normal(6))
+
+
+# --- Row + matrix broadcast ---
+
+def test_add_row_plus_matrix_jacobian(scope, rng):
+    r = scope.Variable(1, 2)
+    Y = scope.Variable(3, 2)
+    f = r + Y
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_add_row_plus_matrix_hessian(scope, rng):
+    r = scope.Variable(1, 2)
+    Y = scope.Variable(3, 2)
+    f = r + Y
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_point(scope, rng)
+    checker.check_hessian(x0, rng.standard_normal(6))
+
+
+# --- Same shape (no broadcast) ---
+
+def test_add_same_shape_jacobian(scope, rng):
+    X = scope.Variable(3, 2)
+    Y = scope.Variable(3, 2)
+    f = X + Y
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+# --- Scalar constant broadcast ---
+
+def test_add_constant_scalar_jacobian(scope, rng):
+    x = scope.Variable(3, 1)
+    f = x + 1.0
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_add_constant_scalar_forward(scope, rng):
+    x = scope.Variable(3, 1)
+    f = x + 2.5
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    np.testing.assert_allclose(fn.forward(), x0 + 2.5)
+
+
+# --- NumPy array constant broadcast ---
+
+def test_add_numpy_array_jacobian(scope, rng):
+    x = scope.Variable(3, 1)
+    b = np.array([1.0, 2.0, 3.0])
+    f = b + x
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_add_numpy_array_forward(scope, rng):
+    x = scope.Variable(3, 1)
+    b = np.array([10.0, 20.0, 30.0])
+    f = x + b
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    np.testing.assert_allclose(fn.forward(), x0 + b)
+
+
+# --- Column vectors ---
+
+def test_add_column_vectors_jacobian(scope, rng):
+    x = scope.Variable(4, 1)
+    y = scope.Variable(4, 1)
+    f = x + y
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
diff --git a/tests/affine/test_diag_vec.py b/tests/affine/test_diag_vec.py
new file mode 100644
index 0000000..db44701
--- /dev/null
+++ b/tests/affine/test_diag_vec.py
@@ -0,0 +1,22 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_diag_vec_jacobian(scope, rng):
+    x = scope.Variable(3, 1)
+    f = sp.diag_vec(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_diag_vec_forward(scope, rng):
+    x = scope.Variable(3, 1)
+    f = sp.diag_vec(x)
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    result = fn.forward()
+    # diag_vec produces a 3x3 matrix, flattened column-major
+    expected = np.diag(x0).ravel(order="F")
+    np.testing.assert_allclose(result, expected)
diff --git a/tests/affine/test_hstack.py b/tests/affine/test_hstack.py
new file mode 100644
index 0000000..0c4a37f
--- /dev/null
+++ b/tests/affine/test_hstack.py
@@ -0,0 +1,33 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_hstack_vectors_jacobian(scope, rng):
+    x = scope.Variable(3, 1)
+    y = scope.Variable(3, 1)
+    f = sp.hstack([x, y])
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_hstack_vectors_forward(scope, rng):
+    x = scope.Variable(3, 1)
+    y = scope.Variable(3, 1)
+    f = sp.hstack([x, y])
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    x_val = x.value
+    y_val = y.value
+    np.testing.assert_allclose(fn.forward(), np.concatenate([x_val, y_val]))
+
+
+def test_hstack_three_jacobian(scope, rng):
+    a = scope.Variable(2, 1)
+    b = scope.Variable(2, 1)
+    c = scope.Variable(2, 1)
+    f = sp.hstack([a, b, c])
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
diff --git a/tests/affine/test_index.py b/tests/affine/test_index.py
new file mode 100644
index 0000000..e1e4dcc
--- /dev/null
+++ b/tests/affine/test_index.py
@@ -0,0 +1,67 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_index_scalar(scope, rng):
+    x = scope.Variable(4, 1)
+    f = x[0]
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_index_scalar_forward(scope, rng):
+    x = scope.Variable(4, 1)
+    f = x[2]
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    np.testing.assert_allclose(fn.forward(), [x0[2]])
+
+
+def test_index_slice(scope, rng):
+    x = scope.Variable(4, 1)
+    f = x[1:3]
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_index_slice_forward(scope, rng):
+    x = scope.Variable(4, 1)
+    f = x[1:3]
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    np.testing.assert_allclose(fn.forward(), x0[1:3])
+
+
+def test_index_fancy(scope, rng):
+    x = scope.Variable(4, 1)
+    f = x[[0, 3]]
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_index_matrix_element(scope, rng):
+    X = scope.Variable(3, 2)
+    f = X[1, 0]
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_index_matrix_row_slice(scope, rng):
+    X = scope.Variable(3, 2)
+    f = X[0:2, :]
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_index_matrix_column(scope, rng):
+    X = scope.Variable(3, 2)
+    f = X[:, 1]
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
diff --git a/tests/affine/test_left_matmul.py b/tests/affine/test_left_matmul.py
new file mode 100644
index 0000000..3c72164
--- /dev/null
+++ b/tests/affine/test_left_matmul.py
@@ -0,0 +1,60 @@
+import numpy as np
+import scipy.sparse
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_left_matmul_dense_jacobian(scope, rng):
+    x = scope.Variable(3, 1)
+    A = rng.standard_normal((4, 3))
+    f = A @ x
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_left_matmul_dense_forward(scope, rng):
+    x = scope.Variable(3, 1)
+    A = rng.standard_normal((4, 3))
+    f = A @ x
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    np.testing.assert_allclose(fn.forward(), A @ x0, rtol=1e-10)
+
+
+def test_left_matmul_sparse_jacobian(scope, rng):
+    x = scope.Variable(3, 1)
+    A = scipy.sparse.eye(3, format="csr") * 2.0
+    f = A @ x
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_left_matmul_sparse_forward(scope, rng):
+    x = scope.Variable(3, 1)
+    A = scipy.sparse.eye(3, format="csr") * 3.0
+    f = A @ x
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    np.testing.assert_allclose(fn.forward(), 3.0 * x0)
+
+
+def test_left_matmul_parameter_jacobian(scope, rng):
+    x = scope.Variable(3, 1)
+    A = scope.Parameter(4, 3, value=rng.standard_normal((4, 3)))
+    f = A @ x
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_left_matmul_parameter_update(scope, rng):
+    x = scope.Variable(3, 1)
+    A = scope.Parameter(3, 3, value=np.eye(3))
+    f = A @ x
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    np.testing.assert_allclose(fn.forward(), x0)
+    A.value = 2 * np.eye(3)
+    np.testing.assert_allclose(fn.forward(), 2 * x0)
diff --git a/tests/affine/test_neg.py b/tests/affine/test_neg.py
new file mode 100644
index 0000000..77ab620
--- /dev/null
+++ b/tests/affine/test_neg.py
@@ -0,0 +1,27 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_neg_vector_forward(scope, rng):
+    x = scope.Variable(4, 1)
+    f = -x
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    np.testing.assert_allclose(fn.forward(), -x0)
+
+
+def test_neg_vector_jacobian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = -x
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_neg_matrix_jacobian(scope, rng):
+    X = scope.Variable(3, 2)
+    f = -X
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
diff --git a/tests/affine/test_reshape.py b/tests/affine/test_reshape.py
new file mode 100644
index 0000000..068ed0b
--- /dev/null
+++ b/tests/affine/test_reshape.py
@@ -0,0 +1,19 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_reshape_jacobian(scope, rng):
+    x = scope.Variable(6, 1)
+    f = sp.reshape(x, 2, 3)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_reshape_matrix_jacobian(scope, rng):
+    X = scope.Variable(3, 2)
+    f = sp.reshape(X, 6, 1)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
diff --git a/tests/affine/test_right_matmul.py b/tests/affine/test_right_matmul.py
new file mode 100644
index 0000000..dff4d14
--- /dev/null
+++ b/tests/affine/test_right_matmul.py
@@ -0,0 +1,33 @@
+import numpy as np
+import scipy.sparse
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_right_matmul_dense_jacobian(scope, rng):
+    x = scope.Variable(1, 3)
+    A = rng.standard_normal((3, 4))
+    f = x @ A
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_right_matmul_dense_forward(scope, rng):
+    x = scope.Variable(1, 3)
+    A = rng.standard_normal((3, 4))
+    f = x @ A
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    x_mat = x0.reshape(1, 3)
+    expected = (x_mat @ A).ravel(order="F")
+    np.testing.assert_allclose(fn.forward(), expected, rtol=1e-10)
+
+
+def test_right_matmul_sparse_jacobian(scope, rng):
+    x = scope.Variable(1, 3)
+    A = scipy.sparse.eye(3, format="csr") * 2.0
+    f = x @ A
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
diff --git a/tests/affine/test_scalar_mult.py b/tests/affine/test_scalar_mult.py
new file mode 100644
index 0000000..ed19a92
--- /dev/null
+++ b/tests/affine/test_scalar_mult.py
@@ -0,0 +1,55 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_scalar_mult_constant_jacobian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = 3.0 * x
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_scalar_mult_constant_forward(scope, rng):
+    x = scope.Variable(3, 1)
+    f = 2.5 * x
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    np.testing.assert_allclose(fn.forward(), 2.5 * x0)
+
+
+def test_scalar_mult_parameter_jacobian(scope, rng):
+    x = scope.Variable(4, 1)
+    a = scope.Parameter(1, 1, value=np.array([[3.0]]))
+    f = a * x
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_scalar_mult_parameter_update(scope, rng):
+    x = scope.Variable(3, 1)
+    a = scope.Parameter(1, 1, value=np.array([[2.0]]))
+    f = a * x
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    np.testing.assert_allclose(fn.forward(), 2.0 * x0)
+    a.value = np.array([[5.0]])
+    np.testing.assert_allclose(fn.forward(), 5.0 * x0)
+
+
+def test_scalar_mult_matrix_jacobian(scope, rng):
+    X = scope.Variable(3, 2)
+    f = 2.0 * X
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_right_scalar_mult_jacobian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = x * 3.0
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
diff --git a/tests/affine/test_sub.py b/tests/affine/test_sub.py
new file mode 100644
index 0000000..c53013d
--- /dev/null
+++ b/tests/affine/test_sub.py
@@ -0,0 +1,39 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_sub_vectors_forward(scope, rng):
+    x = scope.Variable(3, 1)
+    y = scope.Variable(3, 1)
+    f = x - y
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    x_val = x.value
+    y_val = y.value
+    np.testing.assert_allclose(fn.forward(), x_val - y_val)
+
+
+def test_sub_vectors_jacobian(scope, rng):
+    x = scope.Variable(3, 1)
+    y = scope.Variable(3, 1)
+    f = x - y
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_sub_scalar_constant(scope, rng):
+    x = scope.Variable(3, 1)
+    f = x - 1.0
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    np.testing.assert_allclose(fn.forward(), x0 - 1.0)
+
+
+def test_rsub_constant(scope, rng):
+    x = scope.Variable(3, 1)
+    f = 1.0 - x
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    np.testing.assert_allclose(fn.forward(), 1.0 - x0)
diff --git a/tests/affine/test_sum.py b/tests/affine/test_sum.py
new file mode 100644
index 0000000..a471f96
--- /dev/null
+++ b/tests/affine/test_sum.py
@@ -0,0 +1,35 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_sum_all_jacobian(scope, rng):
+    x = scope.Variable(3, 2)
+    f = sp.sum(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_sum_axis0_jacobian(scope, rng):
+    x = scope.Variable(3, 2)
+    f = sp.sum(x, axis=0)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_sum_axis1_jacobian(scope, rng):
+    x = scope.Variable(3, 2)
+    f = sp.sum(x, axis=1)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_sum_vector_jacobian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.sum(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
diff --git a/tests/affine/test_trace.py b/tests/affine/test_trace.py
new file mode 100644
index 0000000..0f723b5
--- /dev/null
+++ b/tests/affine/test_trace.py
@@ -0,0 +1,20 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_trace_jacobian(scope, rng):
+    X = scope.Variable(3, 3)
+    f = sp.trace(X)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_trace_hessian(scope, rng):
+    X = scope.Variable(3, 3)
+    f = sp.trace(X)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_point(scope, rng)
+    checker.check_hessian(x0, np.array([1.0]))
diff --git a/tests/affine/test_transpose.py b/tests/affine/test_transpose.py
new file mode 100644
index 0000000..e7d4700
--- /dev/null
+++ b/tests/affine/test_transpose.py
@@ -0,0 +1,19 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_transpose_matrix_jacobian(scope, rng):
+    X = scope.Variable(3, 2)
+    f = X.T
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_transpose_vector_jacobian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = x.T
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
diff --git a/tests/affine/test_vector_mult.py b/tests/affine/test_vector_mult.py
new file mode 100644
index 0000000..abdca11
--- /dev/null
+++ b/tests/affine/test_vector_mult.py
@@ -0,0 +1,41 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_vector_mult_constant_jacobian(scope, rng):
+    x = scope.Variable(3, 1)
+    c = np.array([1.0, 2.0, 3.0])
+    f = c * x
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_vector_mult_constant_forward(scope, rng):
+    x = scope.Variable(3, 1)
+    c = np.array([1.0, 2.0, 3.0])
+    f = c * x
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    np.testing.assert_allclose(fn.forward(), c * x0)
+
+
+def test_vector_mult_parameter_jacobian(scope, rng):
+    x = scope.Variable(3, 1)
+    a = scope.Parameter(3, 1, value=np.array([1.0, 2.0, 3.0]))
+    f = a * x
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_vector_mult_parameter_update(scope, rng):
+    x = scope.Variable(3, 1)
+    a = scope.Parameter(3, 1, value=np.array([1.0, 1.0, 1.0]))
+    f = a * x
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    np.testing.assert_allclose(fn.forward(), x0)
+    a.value = np.array([2.0, 3.0, 4.0])
+    np.testing.assert_allclose(fn.forward(), np.array([2.0, 3.0, 4.0]) * x0)
diff --git a/tests/affine/test_vstack.py b/tests/affine/test_vstack.py
new file mode 100644
index 0000000..d75bdb9
--- /dev/null
+++ b/tests/affine/test_vstack.py
@@ -0,0 +1,32 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_vstack_vectors_jacobian(scope, rng):
+    x = scope.Variable(3, 1)
+    y = scope.Variable(2, 1)
+    f = sp.vstack([x, y])
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_vstack_vectors_forward(scope, rng):
+    x = scope.Variable(3, 1)
+    y = scope.Variable(2, 1)
+    f = sp.vstack([x, y])
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    x_val = x.value
+    y_val = y.value
+    np.testing.assert_allclose(fn.forward(), np.concatenate([x_val, y_val]))
+
+
+def test_vstack_matrices_jacobian(scope, rng):
+    X = scope.Variable(2, 3)
+    Y = scope.Variable(2, 3)
+    f = sp.vstack([X, Y])
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
diff --git a/tests/bivariate_full_dom/__init__.py b/tests/bivariate_full_dom/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/bivariate_full_dom/test_matmul.py b/tests/bivariate_full_dom/test_matmul.py
new file mode 100644
index 0000000..4d6e62f
--- /dev/null
+++ b/tests/bivariate_full_dom/test_matmul.py
@@ -0,0 +1,32 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_matmul_jacobian(scope, rng):
+    X = scope.Variable(2, 3)
+    Y = scope.Variable(3, 2)
+    f = X @ Y
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_matmul_hessian(scope, rng):
+    X = scope.Variable(2, 3)
+    Y = scope.Variable(3, 2)
+    f = X @ Y
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_point(scope, rng)
+    checker.check_hessian(x0, rng.standard_normal(4))
+
+
+def test_matmul_vec_jacobian(scope, rng):
+    """Row vector @ column vector = scalar."""
+    x = scope.Variable(1, 3)
+    y = scope.Variable(3, 1)
+    f = x @ y
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
diff --git a/tests/bivariate_full_dom/test_multiply.py b/tests/bivariate_full_dom/test_multiply.py
new file mode 100644
index 0000000..b9cd0a9
--- /dev/null
+++ b/tests/bivariate_full_dom/test_multiply.py
@@ -0,0 +1,42 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_multiply_vectors_jacobian(scope, rng):
+    x = scope.Variable(4, 1)
+    y = scope.Variable(4, 1)
+    f = x * y
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_multiply_vectors_hessian(scope, rng):
+    x = scope.Variable(4, 1)
+    y = scope.Variable(4, 1)
+    f = x * y
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_point(scope, rng)
+    checker.check_hessian(x0, rng.standard_normal(4))
+
+
+def test_multiply_vectors_forward(scope, rng):
+    x = scope.Variable(3, 1)
+    y = scope.Variable(3, 1)
+    f = x * y
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    x_val = x.value
+    y_val = y.value
+    np.testing.assert_allclose(fn.forward(), x_val * y_val)
+
+
+def test_multiply_matrices_jacobian(scope, rng):
+    X = scope.Variable(3, 2)
+    Y = scope.Variable(3, 2)
+    f = X * Y
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
diff --git a/tests/complicated/__init__.py b/tests/complicated/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/complicated/test_compositions.py b/tests/complicated/test_compositions.py
new file mode 100644
index 0000000..666dc3e
--- /dev/null
+++ b/tests/complicated/test_compositions.py
@@ -0,0 +1,312 @@
+"""Complicated composition tests.
+
+Each test builds a deep or wide expression involving many atoms, then
+verifies forward (against manual NumPy), Jacobian, and Hessian.
+"""
+
+import numpy as np
+import pytest
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point, random_positive_point
+
+
+# -----------------------------------------------------------------------
+# 1. Affine chain: A @ x + b
+# -----------------------------------------------------------------------
+
+class TestAffineChain:
+    def _build(self, scope, rng):
+        x = scope.Variable(3, 1)
+        A = rng.standard_normal((3, 3))
+        b = rng.standard_normal(3)
+        f = A @ x + b
+        fn = sp.compile(f)
+        return fn, x, A, b
+
+    def test_forward(self, scope, rng):
+        fn, x, A, b = self._build(scope, rng)
+        x0 = random_point(scope, rng)
+        np.testing.assert_allclose(fn.forward(), A @ x0 + b, rtol=1e-10)
+
+    def test_jacobian(self, scope, rng):
+        fn, x, A, b = self._build(scope, rng)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(random_point(scope, rng))
+
+    def test_hessian(self, scope, rng):
+        fn, x, A, b = self._build(scope, rng)
+        checker = NumericalDerivativeChecker(fn, scope)
+        x0 = random_point(scope, rng)
+        checker.check_hessian(x0, rng.standard_normal(3))
+
+
+# -----------------------------------------------------------------------
+# 2. Nonlinear composition: exp(A @ x) + sin(x)
+# -----------------------------------------------------------------------
+
+class TestNonlinearComposition:
+    def _build(self, scope, rng):
+        x = scope.Variable(3, 1)
+        A = rng.standard_normal((3, 3))
+        f = sp.exp(A @ x) + sp.sin(x)
+        fn = sp.compile(f)
+        return fn, x, A
+
+    def test_forward(self, scope, rng):
+        fn, x, A = self._build(scope, rng)
+        x0 = random_point(scope, rng, low=-0.5, high=0.5)
+        np.testing.assert_allclose(
+            fn.forward(), np.exp(A @ x0) + np.sin(x0), rtol=1e-10
+        )
+
+    def test_jacobian(self, scope, rng):
+        fn, x, A = self._build(scope, rng)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(random_point(scope, rng, low=-0.5, high=0.5))
+
+    def test_hessian(self, scope, rng):
+        fn, x, A = self._build(scope, rng)
+        checker = NumericalDerivativeChecker(fn, scope)
+        x0 = random_point(scope, rng, low=-0.5, high=0.5)
+        checker.check_hessian(x0, rng.standard_normal(3))
+
+
+# -----------------------------------------------------------------------
+# 3. Matrix expression: sin(X) * Y  and  X.T @ Y  (tested separately)
+# -----------------------------------------------------------------------
+
+class TestMatrixExpression:
+    def _build_elementwise(self, scope, rng):
+        X = scope.Variable(3, 2)
+        Y = scope.Variable(3, 2)
+        f = sp.sin(X) * Y
+        fn = sp.compile(f)
+        return fn, X, Y
+
+    def test_elementwise_forward(self, scope, rng):
+        fn, X, Y = self._build_elementwise(scope, rng)
+        x0 = random_point(scope, rng)
+        X_val = X.value.reshape(3, 2, order="F")
+        Y_val = Y.value.reshape(3, 2, order="F")
+        expected = (np.sin(X_val) * Y_val).ravel(order="F")
+        np.testing.assert_allclose(fn.forward(), expected, rtol=1e-10)
+
+    def test_elementwise_jacobian(self, scope, rng):
+        fn, X, Y = self._build_elementwise(scope, rng)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(random_point(scope, rng))
+
+    def test_elementwise_hessian(self, scope, rng):
+        fn, X, Y = self._build_elementwise(scope, rng)
+        checker = NumericalDerivativeChecker(fn, scope)
+        x0 = random_point(scope, rng)
+        checker.check_hessian(x0, rng.standard_normal(6))
+
+    def _build_matmul(self, scope, rng):
+        X = scope.Variable(3, 2)
+        Y = scope.Variable(3, 2)
+        f = X.T @ Y  # (2,3) @ (3,2) = (2,2)
+        fn = sp.compile(f)
+        return fn, X, Y
+
+    def test_matmul_jacobian(self, scope, rng):
+        fn, X, Y = self._build_matmul(scope, rng)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(random_point(scope, rng))
+
+    def test_matmul_hessian(self, scope, rng):
+        fn, X, Y = self._build_matmul(scope, rng)
+        checker = NumericalDerivativeChecker(fn, scope)
+        x0 = random_point(scope, rng)
+        checker.check_hessian(x0, rng.standard_normal(4))
+
+
+# -----------------------------------------------------------------------
+# 4. Broadcast heavy: a * X + r + c
+#    a: scalar (1,1), X: matrix (3,2), r: row (1,2), c: column (3,1)
+# -----------------------------------------------------------------------
+
+class TestBroadcastHeavy:
+    def _build(self, scope, rng):
+        a = scope.Variable(1, 1)
+        X = scope.Variable(3, 2)
+        r = scope.Variable(1, 2)
+        c = scope.Variable(3, 1)
+        f = a * X + r + c
+        fn = sp.compile(f)
+        return fn, a, X, r, c
+
+    def test_forward(self, scope, rng):
+        fn, a, X, r, c = self._build(scope, rng)
+        x0 = random_point(scope, rng)
+        a_val = a.value[0]
+        X_val = X.value.reshape(3, 2, order="F")
+        r_val = r.value.reshape(1, 2, order="F")
+        c_val = c.value.reshape(3, 1, order="F")
+        expected = (a_val * X_val + r_val + c_val).ravel(order="F")
+        np.testing.assert_allclose(fn.forward(), expected, rtol=1e-10)
+
+    def test_jacobian(self, scope, rng):
+        fn, *_ = self._build(scope, rng)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(random_point(scope, rng))
+
+    def test_hessian(self, scope, rng):
+        fn, *_ = self._build(scope, rng)
+        checker = NumericalDerivativeChecker(fn, scope)
+        x0 = random_point(scope, rng)
+        checker.check_hessian(x0, rng.standard_normal(6))
+
+
+# -----------------------------------------------------------------------
+# 5. Index into composition: sum(exp(x[0:3]) + log(x[3:6]))
+# -----------------------------------------------------------------------
+
+class TestIndexIntoComposition:
+    """Index + elementwise composition tests.
+
+    Full-domain ops (exp, sin, etc.) work on indexed expressions.
+    Restricted-domain ops (log, tan, atanh, entr) raise an error when
+    applied directly to an index node (C engine limitation).
+    """
+    def test_full_domain_on_index(self, scope, rng):
+        """exp on indexed variable works correctly."""
+        x = scope.Variable(6, 1)
+        f = sp.exp(x[3:6])
+        fn = sp.compile(f)
+        x0 = random_positive_point(scope, rng)
+        np.testing.assert_allclose(fn.forward(), np.exp(x0[3:6]))
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(x0)
+
+    def test_restricted_domain_on_index_raises(self, scope, rng):
+        """log on indexed variable raises ValueError."""
+        x = scope.Variable(6, 1)
+        with pytest.raises(ValueError, match="log cannot be applied directly"):
+            sp.log(x[3:6])
+
+    def test_workaround_separate_variables(self, scope, rng):
+        """Use separate variables as workaround for restricted-domain + index."""
+        a = scope.Variable(3, 1)
+        b = scope.Variable(3, 1)
+        f = sp.sum(sp.exp(a) + sp.log(b))
+        fn = sp.compile(f)
+        x0 = random_positive_point(scope, rng)
+        a_val = a.value
+        b_val = b.value
+        expected = np.sum(np.exp(a_val) + np.log(b_val))
+        np.testing.assert_allclose(fn.forward(), [expected], rtol=1e-10)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(x0)
+        checker.check_hessian(x0, np.array([1.0]))
+
+
+# -----------------------------------------------------------------------
+# 6. Hstack mixed: hstack([sin(x), A @ x, y])
+# -----------------------------------------------------------------------
+
+class TestHstackMixed:
+    def _build(self, scope, rng):
+        x = scope.Variable(3, 1)
+        y = scope.Variable(3, 1)
+        A = rng.standard_normal((3, 3))
+        f = sp.hstack([sp.sin(x), A @ x, y])
+        fn = sp.compile(f)
+        return fn, x, y, A
+
+    def test_forward(self, scope, rng):
+        fn, x, y, A = self._build(scope, rng)
+        x0 = random_point(scope, rng)
+        x_val = x.value
+        y_val = y.value
+        expected = np.concatenate([np.sin(x_val), A @ x_val, y_val])
+        np.testing.assert_allclose(fn.forward(), expected, rtol=1e-10)
+
+    def test_jacobian(self, scope, rng):
+        fn, *_ = self._build(scope, rng)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(random_point(scope, rng))
+
+    def test_hessian(self, scope, rng):
+        fn, *_ = self._build(scope, rng)
+        checker = NumericalDerivativeChecker(fn, scope)
+        x0 = random_point(scope, rng)
+        checker.check_hessian(x0, rng.standard_normal(9))
+
+
+# -----------------------------------------------------------------------
+# 7. Multi-compile shared scope
+# -----------------------------------------------------------------------
+
+class TestMultiCompileSharedScope:
+    def test_two_expressions_same_scope(self, scope, rng):
+        x = scope.Variable(3, 1)
+        f = sp.sin(x)
+        g = sp.exp(x)
+        fn_f = sp.compile(f)
+        fn_g = sp.compile(g)
+
+        x0 = random_point(scope, rng)
+        np.testing.assert_allclose(fn_f.forward(), np.sin(x0), rtol=1e-10)
+        np.testing.assert_allclose(fn_g.forward(), np.exp(x0), rtol=1e-10)
+
+        # Jacobians are independent
+        J_f = fn_f.jacobian().toarray()
+        J_g = fn_g.jacobian().toarray()
+        np.testing.assert_allclose(J_f, np.diag(np.cos(x0)), rtol=1e-10)
+        np.testing.assert_allclose(J_g, np.diag(np.exp(x0)), rtol=1e-10)
+
+    def test_shared_scope_different_variables(self, scope, rng):
+        x = scope.Variable(3, 1)
+        y = scope.Variable(2, 1)
+        f = sp.sin(x)
+        g = sp.exp(y)
+        fn_f = sp.compile(f)
+        fn_g = sp.compile(g)
+
+        x0 = random_point(scope, rng)
+        x_val = x.value
+        y_val = y.value
+        np.testing.assert_allclose(fn_f.forward(), np.sin(x_val), rtol=1e-10)
+        np.testing.assert_allclose(fn_g.forward(), np.exp(y_val), rtol=1e-10)
+
+        # f's Jacobian is 3x5 (3 outputs, 5 total vars), g's is 2x5
+        J_f = fn_f.jacobian().toarray()
+        J_g = fn_g.jacobian().toarray()
+        assert J_f.shape == (3, 5)
+        assert J_g.shape == (2, 5)
+        # f depends on x (cols 0-2), not y (cols 3-4)
+        np.testing.assert_allclose(J_f[:, 3:], 0.0)
+        # g depends on y (cols 3-4), not x (cols 0-2)
+        np.testing.assert_allclose(J_g[:, 0:3], 0.0)
+
+
+# -----------------------------------------------------------------------
+# 8. Deep chain: exp(sin(tanh(A @ x + b)))
+# -----------------------------------------------------------------------
+
+class TestDeepChain:
+    def _build(self, scope, rng):
+        x = scope.Variable(3, 1)
+        A = rng.standard_normal((3, 3)) * 0.5  # scale down to avoid exp overflow
+        b = rng.standard_normal(3) * 0.1
+        f = sp.exp(sp.sin(sp.tanh(A @ x + b)))
+        fn = sp.compile(f)
+        return fn, x, A, b
+
+    def test_forward(self, scope, rng):
+        fn, x, A, b = self._build(scope, rng)
+        x0 = random_point(scope, rng, low=-0.5, high=0.5)
+        expected = np.exp(np.sin(np.tanh(A @ x0 + b)))
+        np.testing.assert_allclose(fn.forward(), expected, rtol=1e-10)
+
+    def test_jacobian(self, scope, rng):
+        fn, *_ = self._build(scope, rng)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(random_point(scope, rng, low=-0.5, high=0.5))
+
+    def test_hessian(self, scope, rng):
+        fn, *_ = self._build(scope, rng)
+        checker = NumericalDerivativeChecker(fn, scope)
+        x0 = random_point(scope, rng, low=-0.5, high=0.5)
+        checker.check_hessian(x0, rng.standard_normal(3))
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..418baad
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,13 @@
+import pytest
+import numpy as np
+import sparsediffpy as sp
+
+
+@pytest.fixture
+def scope():
+    return sp.Scope()
+
+
+@pytest.fixture
+def rng():
+    return np.random.default_rng(42)
diff --git a/tests/elementwise_full_dom/__init__.py b/tests/elementwise_full_dom/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/elementwise_full_dom/test_asinh.py b/tests/elementwise_full_dom/test_asinh.py
new file mode 100644
index 0000000..076f506
--- /dev/null
+++ b/tests/elementwise_full_dom/test_asinh.py
@@ -0,0 +1,20 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_asinh_vector_jacobian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.asinh(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_asinh_vector_hessian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.asinh(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_point(scope, rng)
+    checker.check_hessian(x0, rng.standard_normal(4))
diff --git a/tests/elementwise_full_dom/test_cos.py b/tests/elementwise_full_dom/test_cos.py
new file mode 100644
index 0000000..85e2fbc
--- /dev/null
+++ b/tests/elementwise_full_dom/test_cos.py
@@ -0,0 +1,28 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_cos_vector_forward(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.cos(x)
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    np.testing.assert_allclose(fn.forward(), np.cos(x0))
+
+
+def test_cos_vector_jacobian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.cos(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_cos_vector_hessian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.cos(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_point(scope, rng)
+    checker.check_hessian(x0, rng.standard_normal(4))
diff --git a/tests/elementwise_full_dom/test_exp.py b/tests/elementwise_full_dom/test_exp.py
new file mode 100644
index 0000000..fe3d8e4
--- /dev/null
+++ b/tests/elementwise_full_dom/test_exp.py
@@ -0,0 +1,44 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_exp_vector_forward(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.exp(x)
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    np.testing.assert_allclose(fn.forward(), np.exp(x0))
+
+
+def test_exp_vector_jacobian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.exp(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_exp_vector_hessian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.exp(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_point(scope, rng)
+    checker.check_hessian(x0, rng.standard_normal(4))
+
+
+def test_exp_matrix_jacobian(scope, rng):
+    X = scope.Variable(3, 2)
+    f = sp.exp(X)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_exp_scalar_jacobian(scope, rng):
+    x = scope.Variable(1, 1)
+    f = sp.exp(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
diff --git a/tests/elementwise_full_dom/test_logistic.py b/tests/elementwise_full_dom/test_logistic.py
new file mode 100644
index 0000000..3e8d6b1
--- /dev/null
+++ b/tests/elementwise_full_dom/test_logistic.py
@@ -0,0 +1,30 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_logistic_vector_jacobian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.logistic(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_logistic_vector_hessian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.logistic(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_point(scope, rng)
+    checker.check_hessian(x0, rng.standard_normal(4))
+
+
+def test_logistic_forward(scope, rng):
+    """The C logistic function is the softplus: log(1 + exp(x))."""
+    x = scope.Variable(3, 1)
+    f = sp.logistic(x)
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    expected = np.log(1.0 + np.exp(x0))
+    np.testing.assert_allclose(fn.forward(), expected)
diff --git a/tests/elementwise_full_dom/test_normal_cdf.py b/tests/elementwise_full_dom/test_normal_cdf.py
new file mode 100644
index 0000000..80f31c1
--- /dev/null
+++ b/tests/elementwise_full_dom/test_normal_cdf.py
@@ -0,0 +1,29 @@
+import numpy as np
+from scipy.stats import norm
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_normal_cdf_vector_jacobian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.normal_cdf(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_normal_cdf_vector_hessian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.normal_cdf(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_point(scope, rng)
+    checker.check_hessian(x0, rng.standard_normal(4))
+
+
+def test_normal_cdf_forward(scope, rng):
+    x = scope.Variable(3, 1)
+    f = sp.normal_cdf(x)
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    np.testing.assert_allclose(fn.forward(), norm.cdf(x0), rtol=1e-6)
diff --git a/tests/elementwise_full_dom/test_power.py b/tests/elementwise_full_dom/test_power.py
new file mode 100644
index 0000000..394bb3e
--- /dev/null
+++ b/tests/elementwise_full_dom/test_power.py
@@ -0,0 +1,45 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_positive_point
+
+
+def test_power_squared_jacobian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = x ** 2
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_positive_point(scope, rng))
+
+
+def test_power_squared_hessian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = x ** 2
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_positive_point(scope, rng)
+    checker.check_hessian(x0, rng.standard_normal(4))
+
+
+def test_power_cubed_jacobian(scope, rng):
+    x = scope.Variable(3, 1)
+    f = x ** 3
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_positive_point(scope, rng))
+
+
+def test_power_half_jacobian(scope, rng):
+    x = scope.Variable(3, 1)
+    f = sp.power(x, 0.5)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_positive_point(scope, rng))
+
+
+def test_power_half_hessian(scope, rng):
+    x = scope.Variable(3, 1)
+    f = sp.power(x, 0.5)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_positive_point(scope, rng)
+    checker.check_hessian(x0, rng.standard_normal(3))
diff --git a/tests/elementwise_full_dom/test_sin.py b/tests/elementwise_full_dom/test_sin.py
new file mode 100644
index 0000000..0f4abfd
--- /dev/null
+++ b/tests/elementwise_full_dom/test_sin.py
@@ -0,0 +1,36 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_sin_vector_forward(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.sin(x)
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    np.testing.assert_allclose(fn.forward(), np.sin(x0))
+
+
+def test_sin_vector_jacobian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.sin(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_sin_vector_hessian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.sin(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_point(scope, rng)
+    checker.check_hessian(x0, rng.standard_normal(4))
+
+
+def test_sin_matrix_jacobian(scope, rng):
+    X = scope.Variable(3, 2)
+    f = sp.sin(X)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
diff --git a/tests/elementwise_full_dom/test_sinh.py b/tests/elementwise_full_dom/test_sinh.py
new file mode 100644
index 0000000..674e1e1
--- /dev/null
+++ b/tests/elementwise_full_dom/test_sinh.py
@@ -0,0 +1,20 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_sinh_vector_jacobian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.sinh(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_sinh_vector_hessian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.sinh(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_point(scope, rng)
+    checker.check_hessian(x0, rng.standard_normal(4))
diff --git a/tests/elementwise_full_dom/test_tanh.py b/tests/elementwise_full_dom/test_tanh.py
new file mode 100644
index 0000000..f0201fd
--- /dev/null
+++ b/tests/elementwise_full_dom/test_tanh.py
@@ -0,0 +1,20 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_tanh_vector_jacobian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.tanh(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_tanh_vector_hessian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.tanh(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_point(scope, rng)
+    checker.check_hessian(x0, rng.standard_normal(4))
diff --git a/tests/elementwise_full_dom/test_xexp.py b/tests/elementwise_full_dom/test_xexp.py
new file mode 100644
index 0000000..8065050
--- /dev/null
+++ b/tests/elementwise_full_dom/test_xexp.py
@@ -0,0 +1,28 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_xexp_vector_jacobian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.xexp(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_xexp_vector_hessian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.xexp(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_point(scope, rng)
+    checker.check_hessian(x0, rng.standard_normal(4))
+
+
+def test_xexp_forward(scope, rng):
+    x = scope.Variable(3, 1)
+    f = sp.xexp(x)
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    np.testing.assert_allclose(fn.forward(), x0 * np.exp(x0))
diff --git a/tests/elementwise_restricted_dom/__init__.py b/tests/elementwise_restricted_dom/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/elementwise_restricted_dom/test_atanh.py b/tests/elementwise_restricted_dom/test_atanh.py
new file mode 100644
index 0000000..19b5503
--- /dev/null
+++ b/tests/elementwise_restricted_dom/test_atanh.py
@@ -0,0 +1,21 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_atanh_vector_jacobian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.atanh(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    # Domain: (-1, 1)
+    checker.check_jacobian(random_point(scope, rng, low=-0.8, high=0.8))
+
+
+def test_atanh_vector_hessian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.atanh(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_point(scope, rng, low=-0.8, high=0.8)
+    checker.check_hessian(x0, rng.standard_normal(4))
diff --git a/tests/elementwise_restricted_dom/test_entr.py b/tests/elementwise_restricted_dom/test_entr.py
new file mode 100644
index 0000000..96c28f2
--- /dev/null
+++ b/tests/elementwise_restricted_dom/test_entr.py
@@ -0,0 +1,28 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_positive_point
+
+
+def test_entr_vector_forward(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.entr(x)
+    fn = sp.compile(f)
+    x0 = random_positive_point(scope, rng)
+    np.testing.assert_allclose(fn.forward(), -x0 * np.log(x0))
+
+
+def test_entr_vector_jacobian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.entr(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_positive_point(scope, rng))
+
+
+def test_entr_vector_hessian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.entr(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_positive_point(scope, rng)
+    checker.check_hessian(x0, rng.standard_normal(4))
diff --git a/tests/elementwise_restricted_dom/test_log.py b/tests/elementwise_restricted_dom/test_log.py
new file mode 100644
index 0000000..2e3b4b0
--- /dev/null
+++ b/tests/elementwise_restricted_dom/test_log.py
@@ -0,0 +1,36 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_positive_point
+
+
+def test_log_vector_forward(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.log(x)
+    fn = sp.compile(f)
+    x0 = random_positive_point(scope, rng)
+    np.testing.assert_allclose(fn.forward(), np.log(x0))
+
+
+def test_log_vector_jacobian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.log(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_positive_point(scope, rng))
+
+
+def test_log_vector_hessian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.log(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_positive_point(scope, rng)
+    checker.check_hessian(x0, rng.standard_normal(4))
+
+
+def test_log_matrix_jacobian(scope, rng):
+    X = scope.Variable(3, 2)
+    f = sp.log(X)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_positive_point(scope, rng))
diff --git a/tests/elementwise_restricted_dom/test_tan.py b/tests/elementwise_restricted_dom/test_tan.py
new file mode 100644
index 0000000..b270d92
--- /dev/null
+++ b/tests/elementwise_restricted_dom/test_tan.py
@@ -0,0 +1,21 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_tan_vector_jacobian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.tan(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    # Use small values to stay away from poles at pi/2
+    checker.check_jacobian(random_point(scope, rng, low=-0.5, high=0.5))
+
+
+def test_tan_vector_hessian(scope, rng):
+    x = scope.Variable(4, 1)
+    f = sp.tan(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_point(scope, rng, low=-0.5, high=0.5)
+    checker.check_hessian(x0, rng.standard_normal(4))
diff --git a/tests/other/__init__.py b/tests/other/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/other/test_prod.py b/tests/other/test_prod.py
new file mode 100644
index 0000000..679b02d
--- /dev/null
+++ b/tests/other/test_prod.py
@@ -0,0 +1,44 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_positive_point
+
+
+def test_prod_all_jacobian(scope, rng):
+    x = scope.Variable(3, 1)
+    f = sp.prod(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_positive_point(scope, rng))
+
+
+def test_prod_all_hessian(scope, rng):
+    x = scope.Variable(3, 1)
+    f = sp.prod(x)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_positive_point(scope, rng)
+    checker.check_hessian(x0, np.array([1.0]))
+
+
+def test_prod_all_forward(scope, rng):
+    x = scope.Variable(3, 1)
+    f = sp.prod(x)
+    fn = sp.compile(f)
+    x0 = random_positive_point(scope, rng)
+    np.testing.assert_allclose(fn.forward(), [np.prod(x0)])
+
+
+def test_prod_axis_zero_jacobian(scope, rng):
+    X = scope.Variable(3, 2)
+    f = sp.prod(X, axis=0)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_positive_point(scope, rng))
+
+
+def test_prod_axis_one_jacobian(scope, rng):
+    X = scope.Variable(3, 2)
+    f = sp.prod(X, axis=1)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_positive_point(scope, rng))
diff --git a/tests/other/test_quad_form.py b/tests/other/test_quad_form.py
new file mode 100644
index 0000000..be5729f
--- /dev/null
+++ b/tests/other/test_quad_form.py
@@ -0,0 +1,43 @@
+import numpy as np
+import scipy.sparse
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+def test_quad_form_identity_jacobian(scope, rng):
+    x = scope.Variable(3, 1)
+    Q = scipy.sparse.eye(3, format="csr")
+    f = sp.quad_form(x, Q)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_quad_form_identity_hessian(scope, rng):
+    x = scope.Variable(3, 1)
+    Q = scipy.sparse.eye(3, format="csr")
+    f = sp.quad_form(x, Q)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_point(scope, rng)
+    checker.check_hessian(x0, np.array([1.0]))
+
+
+def test_quad_form_dense_jacobian(scope, rng):
+    x = scope.Variable(3, 1)
+    Q = rng.standard_normal((3, 3))
+    Q = Q.T @ Q  # make positive definite
+    f = sp.quad_form(x, Q)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_point(scope, rng))
+
+
+def test_quad_form_forward(scope, rng):
+    x = scope.Variable(3, 1)
+    Q = np.eye(3) * 2.0
+    f = sp.quad_form(x, Q)
+    fn = sp.compile(f)
+    x0 = random_point(scope, rng)
+    expected = x0 @ (2.0 * x0)
+    np.testing.assert_allclose(fn.forward(), [expected], rtol=1e-10)
diff --git a/tests/other/test_quad_over_lin.py b/tests/other/test_quad_over_lin.py
new file mode 100644
index 0000000..b78672b
--- /dev/null
+++ b/tests/other/test_quad_over_lin.py
@@ -0,0 +1,34 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_positive_point
+
+
+def test_quad_over_lin_jacobian(scope, rng):
+    x = scope.Variable(3, 1)
+    z = scope.Variable(1, 1)
+    f = sp.quad_over_lin(x, z)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_positive_point(scope, rng))
+
+
+def test_quad_over_lin_hessian(scope, rng):
+    x = scope.Variable(3, 1)
+    z = scope.Variable(1, 1)
+    f = sp.quad_over_lin(x, z)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_positive_point(scope, rng)
+    checker.check_hessian(x0, np.array([1.0]))
+
+
+def test_quad_over_lin_forward(scope, rng):
+    x = scope.Variable(3, 1)
+    z = scope.Variable(1, 1)
+    f = sp.quad_over_lin(x, z)
+    fn = sp.compile(f)
+    x0 = random_positive_point(scope, rng)
+    x_val = x.value
+    z_val = z.value[0]
+    expected = np.sum(x_val ** 2) / z_val
+    np.testing.assert_allclose(fn.forward(), [expected], rtol=1e-10)
diff --git a/tests/other/test_rel_entr.py b/tests/other/test_rel_entr.py
new file mode 100644
index 0000000..c2dc531
--- /dev/null
+++ b/tests/other/test_rel_entr.py
@@ -0,0 +1,34 @@
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_positive_point
+
+
+def test_rel_entr_jacobian(scope, rng):
+    x = scope.Variable(3, 1)
+    y = scope.Variable(3, 1)
+    f = sp.rel_entr(x, y)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_positive_point(scope, rng))
+
+
+def test_rel_entr_hessian(scope, rng):
+    x = scope.Variable(3, 1)
+    y = scope.Variable(3, 1)
+    f = sp.rel_entr(x, y)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_positive_point(scope, rng)
+    checker.check_hessian(x0, rng.standard_normal(3))
+
+
+def test_rel_entr_forward(scope, rng):
+    x = scope.Variable(3, 1)
+    y = scope.Variable(3, 1)
+    f = sp.rel_entr(x, y)
+    fn = sp.compile(f)
+    x0 = random_positive_point(scope, rng)
+    x_val = x.value
+    y_val = y.value
+    expected = x_val * np.log(x_val / y_val)
+    np.testing.assert_allclose(fn.forward(), expected)
diff --git a/tests/test_misc.py b/tests/test_misc.py
new file mode 100644
index 0000000..89814bb
--- /dev/null
+++ b/tests/test_misc.py
@@ -0,0 +1,366 @@
+"""Miscellaneous tests: hessians for affine atoms, re-evaluation, negative indexing,
+parameter jacobian after update, x.T@x, sub with broadcast, scope roundtrip,
+compile twice, degenerate cases."""
+
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point, random_positive_point
+
+
+# ---------------------------------------------------------------------------
+# Hessian for affine atoms (should be zero, but verify through compositions)
+# ---------------------------------------------------------------------------
+
+class TestAffineHessians:
+    def test_neg_hessian_is_zero(self, scope, rng):
+        x = scope.Variable(3, 1)
+        f = -x
+        fn = sp.compile(f)
+        x0 = random_point(scope, rng)
+        H = fn.hessian(rng.standard_normal(3))
+        np.testing.assert_allclose(H.toarray(), np.zeros((3, 3)), atol=1e-14)
+
+    def test_add_hessian_is_zero(self, scope, rng):
+        x = scope.Variable(3, 1)
+        y = scope.Variable(3, 1)
+        f = x + y
+        fn = sp.compile(f)
+        x0 = random_point(scope, rng)
+        H = fn.hessian(rng.standard_normal(3))
+        np.testing.assert_allclose(H.toarray(), np.zeros((6, 6)), atol=1e-14)
+
+    def test_hstack_hessian_is_zero(self, scope, rng):
+        x = scope.Variable(3, 1)
+        y = scope.Variable(3, 1)
+        f = sp.hstack([x, y])
+        fn = sp.compile(f)
+        x0 = random_point(scope, rng)
+        H = fn.hessian(rng.standard_normal(6))
+        np.testing.assert_allclose(H.toarray(), np.zeros((6, 6)), atol=1e-14)
+
+    def test_index_hessian_is_zero(self, scope, rng):
+        x = scope.Variable(4, 1)
+        f = x[1:3]
+        fn = sp.compile(f)
+        x0 = random_point(scope, rng)
+        H = fn.hessian(rng.standard_normal(2))
+        np.testing.assert_allclose(H.toarray(), np.zeros((4, 4)), atol=1e-14)
+
+    def test_sum_hessian_is_zero(self, scope, rng):
+        x = scope.Variable(3, 2)
+        f = sp.sum(x)
+        fn = sp.compile(f)
+        x0 = random_point(scope, rng)
+        H = fn.hessian(np.array([1.0]))
+        np.testing.assert_allclose(H.toarray(), np.zeros((6, 6)), atol=1e-14)
+
+    def test_sin_of_sum_hessian_nonzero(self, scope, rng):
+        """sin(sum(x)) — affine feeding into nonlinear produces nonzero Hessian."""
+        x = scope.Variable(3, 1)
+        f = sp.sin(sp.sum(x))
+        fn = sp.compile(f)
+        checker = NumericalDerivativeChecker(fn, scope)
+        x0 = random_point(scope, rng)
+        checker.check_hessian(x0, np.array([1.0]))
+
+
+# ---------------------------------------------------------------------------
+# Re-evaluation after value change
+# ---------------------------------------------------------------------------
+
+class TestReEvaluation:
+    def test_forward_updates_with_new_values(self, scope, rng):
+        x = scope.Variable(3, 1)
+        f = sp.sin(x)
+        fn = sp.compile(f)
+
+        x.value = np.array([0.0, 0.0, 0.0])
+        np.testing.assert_allclose(fn.forward(), np.sin([0, 0, 0]))
+
+        x.value = np.array([1.0, 2.0, 3.0])
+        np.testing.assert_allclose(fn.forward(), np.sin([1, 2, 3]))
+
+    def test_jacobian_updates_with_new_values(self, scope, rng):
+        x = scope.Variable(3, 1)
+        f = sp.sin(x)
+        fn = sp.compile(f)
+
+        x.value = np.array([0.0, 0.0, 0.0])
+        J1 = fn.jacobian().toarray()
+        np.testing.assert_allclose(np.diag(J1), np.cos([0, 0, 0]))
+
+        x.value = np.array([1.0, 2.0, 3.0])
+        J2 = fn.jacobian().toarray()
+        np.testing.assert_allclose(np.diag(J2), np.cos([1, 2, 3]))
+
+    def test_hessian_updates_with_new_values(self, scope, rng):
+        x = scope.Variable(3, 1)
+        f = sp.sin(x)
+        fn = sp.compile(f)
+        w = np.ones(3)
+
+        x.value = np.array([0.0, 0.0, 0.0])
+        H1 = fn.hessian(w).toarray()
+        np.testing.assert_allclose(np.diag(H1), -np.sin([0, 0, 0]), atol=1e-14)
+
+        x.value = np.array([1.0, 2.0, 3.0])
+        H2 = fn.hessian(w).toarray()
+        np.testing.assert_allclose(np.diag(H2), -np.sin([1, 2, 3]))
+
+
+# ---------------------------------------------------------------------------
+# Negative indexing
+# ---------------------------------------------------------------------------
+
+class TestNegativeIndexing:
+    def test_negative_scalar_index(self, scope, rng):
+        x = scope.Variable(4, 1)
+        f = x[-1]
+        fn = sp.compile(f)
+        x0 = random_point(scope, rng)
+        np.testing.assert_allclose(fn.forward(), [x0[-1]])
+
+    def test_negative_scalar_index_jacobian(self, scope, rng):
+        x = scope.Variable(4, 1)
+        f = x[-1]
+        fn = sp.compile(f)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(random_point(scope, rng))
+
+    def test_negative_slice(self, scope, rng):
+        x = scope.Variable(4, 1)
+        f = x[-2:]
+        fn = sp.compile(f)
+        x0 = random_point(scope, rng)
+        np.testing.assert_allclose(fn.forward(), x0[-2:])
+
+    def test_negative_fancy(self, scope, rng):
+        x = scope.Variable(4, 1)
+        f = x[[-1, -3]]
+        fn = sp.compile(f)
+        x0 = random_point(scope, rng)
+        np.testing.assert_allclose(fn.forward(), x0[[-1, -3]])
+
+
+# ---------------------------------------------------------------------------
+# Parameter Jacobian after update
+# ---------------------------------------------------------------------------
+
+class TestParameterJacobianAfterUpdate:
+    def test_left_matmul_jacobian_after_update(self, scope, rng):
+        x = scope.Variable(3, 1)
+        A = scope.Parameter(3, 3, value=np.eye(3))
+        f = A @ x
+        fn = sp.compile(f)
+
+        x0 = random_point(scope, rng)
+        J1 = fn.jacobian().toarray()
+        np.testing.assert_allclose(J1, np.eye(3), atol=1e-14)
+
+        A.value = 2 * np.eye(3)
+        J2 = fn.jacobian().toarray()
+        np.testing.assert_allclose(J2, 2 * np.eye(3), atol=1e-14)
+
+    def test_scalar_mult_jacobian_after_update(self, scope, rng):
+        x = scope.Variable(3, 1)
+        a = scope.Parameter(1, 1, value=np.array([[3.0]]))
+        f = a * x
+        fn = sp.compile(f)
+
+        x0 = random_point(scope, rng)
+        J1 = fn.jacobian().toarray()
+        np.testing.assert_allclose(J1, 3.0 * np.eye(3), atol=1e-14)
+
+        a.value = np.array([[7.0]])
+        J2 = fn.jacobian().toarray()
+        np.testing.assert_allclose(J2, 7.0 * np.eye(3), atol=1e-14)
+
+
+# ---------------------------------------------------------------------------
+# x.T @ x pattern
+# ---------------------------------------------------------------------------
+
+class TestTransposeMatmul:
+    def test_xT_x_forward(self, scope, rng):
+        x = scope.Variable(3, 1)
+        f = x.T @ x
+        fn = sp.compile(f)
+        x0 = random_point(scope, rng)
+        np.testing.assert_allclose(fn.forward(), [x0 @ x0], rtol=1e-10)
+
+    def test_xT_x_jacobian(self, scope, rng):
+        x = scope.Variable(3, 1)
+        f = x.T @ x
+        fn = sp.compile(f)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(random_point(scope, rng))
+
+    def test_xT_x_hessian(self, scope, rng):
+        x = scope.Variable(3, 1)
+        f = x.T @ x
+        fn = sp.compile(f)
+        checker = NumericalDerivativeChecker(fn, scope)
+        x0 = random_point(scope, rng)
+        checker.check_hessian(x0, np.array([1.0]))
+
+    def test_xT_A_x(self, scope, rng):
+        """x.T @ A @ x where A is a constant matrix."""
+        x = scope.Variable(3, 1)
+        A = rng.standard_normal((3, 3))
+        A = A + A.T  # symmetric
+        f = x.T @ (A @ x)
+        fn = sp.compile(f)
+        checker = NumericalDerivativeChecker(fn, scope)
+        x0 = random_point(scope, rng)
+        checker.check_jacobian(x0)
+        checker.check_hessian(x0, np.array([1.0]))
+
+
+# ---------------------------------------------------------------------------
+# Subtraction with broadcasting
+# ---------------------------------------------------------------------------
+
+class TestSubBroadcast:
+    def test_sub_scalar_broadcast(self, scope, rng):
+        X = scope.Variable(3, 2)
+        a = scope.Variable(1, 1)
+        f = X - a
+        fn = sp.compile(f)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(random_point(scope, rng))
+
+    def test_sub_column_broadcast(self, scope, rng):
+        X = scope.Variable(3, 2)
+        c = scope.Variable(3, 1)
+        f = X - c
+        fn = sp.compile(f)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(random_point(scope, rng))
+
+    def test_rsub_broadcast(self, scope, rng):
+        x = scope.Variable(3, 1)
+        f = np.array([10.0, 20.0, 30.0]) - x
+        fn = sp.compile(f)
+        x0 = random_point(scope, rng)
+        np.testing.assert_allclose(
+            fn.forward(), np.array([10.0, 20.0, 30.0]) - x0
+        )
+
+
+# ---------------------------------------------------------------------------
+# Scope set_values / get_values roundtrip
+# ---------------------------------------------------------------------------
+
+class TestScopeRoundtrip:
+    def test_set_get_roundtrip(self, scope, rng):
+        x = scope.Variable(3, 1)
+        y = scope.Variable(2, 1)
+        vals = np.array([1.0, 2.0, 3.0, 4.0, 5.0])
+        scope.set_values(vals)
+        np.testing.assert_allclose(scope.get_values(), vals)
+        np.testing.assert_allclose(x.value, [1.0, 2.0, 3.0])
+        np.testing.assert_allclose(y.value, [4.0, 5.0])
+
+    def test_variable_value_writes_to_scope(self, scope, rng):
+        x = scope.Variable(3, 1)
+        y = scope.Variable(2, 1)
+        x.value = np.array([10.0, 20.0, 30.0])
+        y.value = np.array([40.0, 50.0])
+        np.testing.assert_allclose(
+            scope.get_values(), [10.0, 20.0, 30.0, 40.0, 50.0]
+        )
+
+
+# ---------------------------------------------------------------------------
+# Compile same expression twice
+# ---------------------------------------------------------------------------
+
+class TestCompileTwice:
+    def test_two_compiles_independent(self, scope, rng):
+        x = scope.Variable(3, 1)
+        f = sp.sin(x)
+        fn1 = sp.compile(f)
+        fn2 = sp.compile(f)
+
+        x.value = np.array([1.0, 2.0, 3.0])
+        np.testing.assert_allclose(fn1.forward(), fn2.forward())
+        np.testing.assert_allclose(
+            fn1.jacobian().toarray(), fn2.jacobian().toarray()
+        )
+
+
+# ---------------------------------------------------------------------------
+# Degenerate / edge cases
+# ---------------------------------------------------------------------------
+
+class TestDegenerateCases:
+    def test_hstack_single(self, scope, rng):
+        x = scope.Variable(3, 1)
+        f = sp.hstack([x])
+        fn = sp.compile(f)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(random_point(scope, rng))
+
+    def test_vstack_single(self, scope, rng):
+        x = scope.Variable(3, 1)
+        f = sp.vstack([x])
+        fn = sp.compile(f)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(random_point(scope, rng))
+
+    def test_sum_scalar(self, scope, rng):
+        x = scope.Variable(1, 1)
+        f = sp.sum(x)
+        fn = sp.compile(f)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(random_point(scope, rng))
+
+    def test_scalar_variable(self, scope, rng):
+        x = scope.Variable(1, 1)
+        f = sp.sin(x)
+        fn = sp.compile(f)
+        checker = NumericalDerivativeChecker(fn, scope)
+        x0 = random_point(scope, rng)
+        checker.check_jacobian(x0)
+        checker.check_hessian(x0, np.array([1.0]))
+
+    def test_identity_expression(self, scope, rng):
+        """Compiling just a variable."""
+        x = scope.Variable(3, 1)
+        fn = sp.compile(x)
+        x0 = random_point(scope, rng)
+        np.testing.assert_allclose(fn.forward(), x0)
+        J = fn.jacobian().toarray()
+        np.testing.assert_allclose(J, np.eye(3))
+
+    def test_constant_expression(self, scope, rng):
+        """Compiling a constant (no variables)."""
+        from sparsediffpy._core._constants import Constant
+        c = Constant(np.array([1.0, 2.0, 3.0]), (3, 1))
+        fn = sp.compile(c)
+        np.testing.assert_allclose(fn.forward(), [1.0, 2.0, 3.0])
+
+    def test_nested_transpose(self, scope, rng):
+        """x.T.T should be x."""
+        x = scope.Variable(3, 1)
+        f = x.T.T
+        assert f.shape == (3, 1)
+        fn = sp.compile(f)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(random_point(scope, rng))
+
+    def test_power_one(self, scope, rng):
+        """x**1 should be identity."""
+        x = scope.Variable(3, 1)
+        f = x ** 1
+        fn = sp.compile(f)
+        x0 = random_point(scope, rng)
+        np.testing.assert_allclose(fn.forward(), x0)
+
+    def test_power_zero(self, scope, rng):
+        """x**0 should be ones."""
+        x = scope.Variable(3, 1)
+        f = x ** 0
+        fn = sp.compile(f)
+        x0 = random_positive_point(scope, rng)
+        np.testing.assert_allclose(fn.forward(), np.ones(3))
diff --git a/tests/test_row_vectors.py b/tests/test_row_vectors.py
new file mode 100644
index 0000000..0c0ec9d
--- /dev/null
+++ b/tests/test_row_vectors.py
@@ -0,0 +1,140 @@
+"""Tests with row vectors (1, n) to exercise different code paths."""
+
+import numpy as np
+import sparsediffpy as sp
+from tests.utils import NumericalDerivativeChecker, random_point
+
+
+class TestRowVectorBasics:
+    def test_row_variable_forward(self, scope, rng):
+        r = scope.Variable(1, 4)
+        fn = sp.compile(r)
+        x0 = random_point(scope, rng)
+        np.testing.assert_allclose(fn.forward(), x0)
+
+    def test_row_sin_jacobian(self, scope, rng):
+        r = scope.Variable(1, 4)
+        f = sp.sin(r)
+        fn = sp.compile(f)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(random_point(scope, rng))
+
+    def test_row_sin_hessian(self, scope, rng):
+        r = scope.Variable(1, 4)
+        f = sp.sin(r)
+        fn = sp.compile(f)
+        checker = NumericalDerivativeChecker(fn, scope)
+        x0 = random_point(scope, rng)
+        checker.check_hessian(x0, rng.standard_normal(4))
+
+
+class TestRowVectorIndexing:
+    def test_row_scalar_index(self, scope, rng):
+        r = scope.Variable(1, 4)
+        f = r[2]
+        fn = sp.compile(f)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(random_point(scope, rng))
+
+    def test_row_scalar_index_forward(self, scope, rng):
+        r = scope.Variable(1, 4)
+        f = r[2]
+        fn = sp.compile(f)
+        x0 = random_point(scope, rng)
+        np.testing.assert_allclose(fn.forward(), [x0[2]])
+
+    def test_row_slice_index(self, scope, rng):
+        r = scope.Variable(1, 4)
+        f = r[1:3]
+        fn = sp.compile(f)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(random_point(scope, rng))
+
+
+class TestRowVectorBroadcast:
+    def test_row_plus_scalar(self, scope, rng):
+        r = scope.Variable(1, 3)
+        f = r + 1.0
+        fn = sp.compile(f)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(random_point(scope, rng))
+
+    def test_row_plus_matrix(self, scope, rng):
+        r = scope.Variable(1, 3)
+        M = scope.Variable(4, 3)
+        f = r + M
+        fn = sp.compile(f)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(random_point(scope, rng))
+
+    def test_scalar_times_row(self, scope, rng):
+        r = scope.Variable(1, 4)
+        f = 2.5 * r
+        fn = sp.compile(f)
+        x0 = random_point(scope, rng)
+        np.testing.assert_allclose(fn.forward(), 2.5 * x0)
+
+
+class TestRowVectorMatmul:
+    def test_row_times_matrix(self, scope, rng):
+        """(1,3) @ (3,2) = (1,2)"""
+        r = scope.Variable(1, 3)
+        A = rng.standard_normal((3, 2))
+        f = r @ A
+        fn = sp.compile(f)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(random_point(scope, rng))
+
+    def test_row_times_column(self, scope, rng):
+        """(1,3) @ (3,1) = (1,1) — dot product."""
+        r = scope.Variable(1, 3)
+        c = scope.Variable(3, 1)
+        f = r @ c
+        fn = sp.compile(f)
+        checker = NumericalDerivativeChecker(fn, scope)
+        x0 = random_point(scope, rng)
+        checker.check_jacobian(x0)
+        checker.check_hessian(x0, np.array([1.0]))
+
+    def test_matrix_times_row_transpose(self, scope, rng):
+        """A @ r.T where r is (1,3) -> r.T is (3,1)."""
+        r = scope.Variable(1, 3)
+        A = rng.standard_normal((4, 3))
+        f = A @ r.T
+        fn = sp.compile(f)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(random_point(scope, rng))
+
+
+class TestRowVectorTranspose:
+    def test_row_transpose_is_column(self, scope, rng):
+        r = scope.Variable(1, 4)
+        f = r.T
+        assert f.shape == (4, 1)
+        fn = sp.compile(f)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(random_point(scope, rng))
+
+    def test_column_transpose_is_row(self, scope, rng):
+        c = scope.Variable(4, 1)
+        f = c.T
+        assert f.shape == (1, 4)
+        fn = sp.compile(f)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(random_point(scope, rng))
+
+
+class TestRowVectorReductions:
+    def test_sum_row(self, scope, rng):
+        r = scope.Variable(1, 4)
+        f = sp.sum(r)
+        fn = sp.compile(f)
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(random_point(scope, rng))
+
+    def test_sum_row_forward(self, scope, rng):
+        r = scope.Variable(1, 4)
+        f = sp.sum(r)
+        fn = sp.compile(f)
+        x0 = random_point(scope, rng)
+        np.testing.assert_allclose(fn.forward(), [np.sum(x0)])
diff --git a/tests/test_validation.py b/tests/test_validation.py
new file mode 100644
index 0000000..8925b1b
--- /dev/null
+++ b/tests/test_validation.py
@@ -0,0 +1,225 @@
+"""Tests for error/validation: shape mismatches, wrong assignments, mixed scopes."""
+
+import numpy as np
+import pytest
+import scipy.sparse
+import sparsediffpy as sp
+
+
+# ---------------------------------------------------------------------------
+# Shape mismatch in binary operators
+# ---------------------------------------------------------------------------
+
+class TestShapeMismatch:
+    def test_add_incompatible(self, scope):
+        x = scope.Variable(3, 1)
+        y = scope.Variable(2, 1)
+        with pytest.raises(ValueError, match="Cannot broadcast"):
+            x + y
+
+    def test_add_incompatible_matrix(self, scope):
+        X = scope.Variable(3, 2)
+        Y = scope.Variable(2, 3)
+        with pytest.raises(ValueError, match="Cannot broadcast"):
+            X + Y
+
+    def test_sub_incompatible(self, scope):
+        x = scope.Variable(3, 1)
+        y = scope.Variable(2, 1)
+        with pytest.raises(ValueError, match="Cannot broadcast"):
+            x - y
+
+    def test_matmul_incompatible(self, scope):
+        x = scope.Variable(3, 1)
+        y = scope.Variable(2, 1)
+        with pytest.raises(ValueError, match="Matmul shape mismatch"):
+            x @ y
+
+    def test_matmul_inner_dim_mismatch(self, scope):
+        X = scope.Variable(3, 4)
+        Y = scope.Variable(2, 3)
+        with pytest.raises(ValueError, match="Matmul shape mismatch"):
+            X @ Y
+
+    def test_rel_entr_shape_mismatch(self, scope):
+        x = scope.Variable(3, 1)
+        y = scope.Variable(2, 1)
+        with pytest.raises(ValueError, match="shape mismatch"):
+            sp.rel_entr(x, y)
+
+    def test_quad_over_lin_non_scalar_z(self, scope):
+        x = scope.Variable(3, 1)
+        z = scope.Variable(2, 1)
+        with pytest.raises(ValueError, match="must be scalar"):
+            sp.quad_over_lin(x, z)
+
+
+# ---------------------------------------------------------------------------
+# Atom-specific shape validation
+# ---------------------------------------------------------------------------
+
+class TestAtomValidation:
+    def test_trace_non_square(self, scope):
+        X = scope.Variable(3, 2)
+        with pytest.raises(ValueError, match="square matrix"):
+            sp.trace(X)
+
+    def test_diag_vec_non_column(self, scope):
+        X = scope.Variable(3, 2)
+        with pytest.raises(ValueError, match="column vector"):
+            sp.diag_vec(X)
+
+    def test_diag_vec_row_vector(self, scope):
+        r = scope.Variable(1, 3)
+        with pytest.raises(ValueError, match="column vector"):
+            sp.diag_vec(r)
+
+    def test_reshape_size_mismatch(self, scope):
+        x = scope.Variable(3, 1)
+        with pytest.raises(ValueError, match="Cannot reshape"):
+            sp.reshape(x, 2, 2)
+
+    def test_quad_form_wrong_Q_size(self, scope):
+        x = scope.Variable(3, 1)
+        Q = np.eye(4)
+        with pytest.raises(ValueError, match="doesn't match"):
+            sp.quad_form(x, Q)
+
+    def test_quad_form_non_column(self, scope):
+        x = scope.Variable(1, 3)
+        Q = np.eye(3)
+        with pytest.raises(ValueError, match="column vector"):
+            sp.quad_form(x, Q)
+
+    def test_pow_non_numeric_exponent(self, scope):
+        x = scope.Variable(3, 1)
+        with pytest.raises(TypeError, match="constant number"):
+            x ** "two"
+
+    def test_hstack_empty(self):
+        with pytest.raises(ValueError, match="at least one"):
+            sp.hstack([])
+
+    def test_vstack_empty(self):
+        with pytest.raises(ValueError, match="at least one"):
+            sp.vstack([])
+
+    def test_hstack_mismatched_rows(self, scope):
+        x = scope.Variable(3, 1)
+        y = scope.Variable(2, 1)
+        with pytest.raises(ValueError, match="same number of rows"):
+            sp.hstack([x, y])
+
+    def test_vstack_mismatched_cols(self, scope):
+        X = scope.Variable(3, 2)
+        Y = scope.Variable(3, 3)
+        with pytest.raises(ValueError, match="same number of columns"):
+            sp.vstack([X, Y])
+
+    def test_restricted_domain_on_index(self, scope):
+        x = scope.Variable(4, 1)
+        with pytest.raises(ValueError, match="cannot be applied directly"):
+            sp.log(x[1:3])
+
+        with pytest.raises(ValueError, match="cannot be applied directly"):
+            sp.tan(x[1:3])
+
+        with pytest.raises(ValueError, match="cannot be applied directly"):
+            sp.atanh(x[1:3])
+
+        with pytest.raises(ValueError, match="cannot be applied directly"):
+            sp.entr(x[1:3])
+
+
+# ---------------------------------------------------------------------------
+# Wrong-size value assignment
+# ---------------------------------------------------------------------------
+
+class TestValueAssignment:
+    def test_variable_wrong_size(self, scope):
+        x = scope.Variable(3, 1)
+        with pytest.raises(ValueError, match="Expected 3"):
+            x.value = np.array([1.0, 2.0])
+
+    def test_variable_too_many(self, scope):
+        x = scope.Variable(3, 1)
+        with pytest.raises(ValueError, match="Expected 3"):
+            x.value = np.array([1.0, 2.0, 3.0, 4.0])
+
+    def test_parameter_wrong_size(self, scope):
+        p = scope.Parameter(2, 2, value=np.eye(2))
+        with pytest.raises(ValueError, match="Expected 4"):
+            p.value = np.array([1.0, 2.0])
+
+    def test_scope_set_values_wrong_size(self, scope):
+        x = scope.Variable(3, 1)
+        with pytest.raises(ValueError, match="Expected flat array of size 3"):
+            scope.set_values(np.array([1.0, 2.0]))
+
+    def test_parameter_init_wrong_size(self, scope):
+        with pytest.raises(ValueError, match="elements"):
+            scope.Parameter(2, 2, value=np.array([1.0, 2.0]))
+
+
+# ---------------------------------------------------------------------------
+# Mixed scopes
+# ---------------------------------------------------------------------------
+
+class TestMixedScopes:
+    def test_mixed_scopes_raises(self):
+        scope1 = sp.Scope()
+        scope2 = sp.Scope()
+        x = scope1.Variable(3, 1)
+        y = scope2.Variable(3, 1)
+        f = x + y
+        with pytest.raises(ValueError, match="same Scope"):
+            sp.compile(f)
+
+
+# ---------------------------------------------------------------------------
+# Invalid shape dimensions
+# ---------------------------------------------------------------------------
+
+class TestInvalidShapes:
+    def test_variable_zero_dim(self, scope):
+        with pytest.raises(ValueError, match="positive"):
+            scope.Variable(0, 1)
+
+    def test_variable_negative_dim(self, scope):
+        with pytest.raises(ValueError, match="positive"):
+            scope.Variable(-1, 3)
+
+    def test_parameter_zero_dim(self, scope):
+        with pytest.raises(ValueError, match="positive"):
+            scope.Parameter(3, 0)
+
+
+# ---------------------------------------------------------------------------
+# Index out of bounds
+# ---------------------------------------------------------------------------
+
+class TestIndexOutOfBounds:
+    def test_scalar_index_out_of_range(self, scope):
+        x = scope.Variable(3, 1)
+        with pytest.raises(IndexError, match="out of range"):
+            x[5]
+
+    def test_negative_index_out_of_range(self, scope):
+        x = scope.Variable(3, 1)
+        with pytest.raises(IndexError, match="out of range"):
+            x[-4]
+
+    def test_fancy_index_out_of_range(self, scope):
+        x = scope.Variable(3, 1)
+        with pytest.raises(IndexError, match="out of range"):
+            x[[0, 5]]
+
+    def test_matrix_index_out_of_range(self, scope):
+        X = scope.Variable(3, 2)
+        with pytest.raises(IndexError, match="out of range"):
+            X[5, 0]
+
+    def test_matrix_col_out_of_range(self, scope):
+        X = scope.Variable(3, 2)
+        with pytest.raises(IndexError, match="out of range"):
+            X[0, 3]
diff --git a/tests/utils.py b/tests/utils.py
new file mode 100644
index 0000000..efb56fb
--- /dev/null
+++ b/tests/utils.py
@@ -0,0 +1,119 @@
+"""Test utilities: numerical derivative checker and helpers."""
+
+import numpy as np
+import sparsediffpy as sp
+
+
+class NumericalDerivativeChecker:
+    """Check Jacobian and Hessian of a compiled expression against
+    central finite differences.
+
+    Usage:
+        checker = NumericalDerivativeChecker(fn, scope)
+        checker.check_jacobian(x0)
+        checker.check_hessian(x0, weights)
+    """
+
+    def __init__(self, compiled_expr, scope, h=1e-5, rtol=1e-5, atol=1e-8):
+        self._fn = compiled_expr
+        self._scope = scope
+        self._h = h
+        self._rtol = rtol
+        self._atol = atol
+
+    def check_jacobian(self, x0):
+        """Compare analytical Jacobian against central finite differences.
+
+        J_approx[:, j] = (f(x + h*e_j) - f(x - h*e_j)) / (2h)
+        """
+        x0 = np.asarray(x0, dtype=np.float64).ravel()
+        n = x0.size
+        self._scope.set_values(x0)
+
+        # Analytical Jacobian
+        J_analytical = self._fn.jacobian().toarray()
+        m = J_analytical.shape[0]
+
+        # Numerical Jacobian via central differences
+        J_numerical = np.zeros((m, n))
+        for j in range(n):
+            x_plus = x0.copy()
+            x_minus = x0.copy()
+            x_plus[j] += self._h
+            x_minus[j] -= self._h
+
+            self._scope.set_values(x_plus)
+            f_plus = self._fn.forward().copy()
+
+            self._scope.set_values(x_minus)
+            f_minus = self._fn.forward().copy()
+
+            J_numerical[:, j] = (f_plus - f_minus) / (2 * self._h)
+
+        # Restore original point
+        self._scope.set_values(x0)
+
+        np.testing.assert_allclose(
+            J_analytical, J_numerical,
+            rtol=self._rtol, atol=self._atol,
+            err_msg="Jacobian mismatch between analytical and numerical",
+        )
+
+    def check_hessian(self, x0, weights):
+        """Compare analytical Hessian against numerical Hessian.
+
+        For phi(x) = w^T f(x), the Hessian is computed by perturbing x_j
+        and recomputing the gradient grad_phi = J^T w:
+
+        H_approx[:, j] = (J(x+h*e_j)^T w - J(x-h*e_j)^T w) / (2h)
+        """
+        x0 = np.asarray(x0, dtype=np.float64).ravel()
+        weights = np.asarray(weights, dtype=np.float64).ravel()
+        n = x0.size
+
+        # Analytical Hessian
+        self._scope.set_values(x0)
+        H_analytical = self._fn.hessian(weights).toarray()
+
+        # Numerical Hessian via central differences on the gradient
+        H_numerical = np.zeros((n, n))
+        for j in range(n):
+            x_plus = x0.copy()
+            x_minus = x0.copy()
+            x_plus[j] += self._h
+            x_minus[j] -= self._h
+
+            self._scope.set_values(x_plus)
+            J_plus = self._fn.jacobian().toarray()
+            grad_plus = J_plus.T @ weights
+
+            self._scope.set_values(x_minus)
+            J_minus = self._fn.jacobian().toarray()
+            grad_minus = J_minus.T @ weights
+
+            H_numerical[:, j] = (grad_plus - grad_minus) / (2 * self._h)
+
+        # Restore original point
+        self._scope.set_values(x0)
+
+        np.testing.assert_allclose(
+            H_analytical, H_numerical,
+            rtol=self._rtol, atol=self._atol,
+            err_msg="Hessian mismatch between analytical and numerical",
+        )
+
+
+def random_point(scope, rng, low=-1.0, high=1.0):
+    """Set all variables to random values and return the flat vector."""
+    n = scope.total_var_size
+    x0 = rng.uniform(low, high, size=n)
+    scope.set_values(x0)
+    return x0
+
+
+def random_positive_point(scope, rng, low=0.1, high=2.0):
+    """Set all variables to positive random values (for restricted domains)."""
+    n = scope.total_var_size
+    x0 = rng.uniform(low, high, size=n)
+    scope.set_values(x0)
+    return x0

From 31819292355a85cadb6bc997daee98edb50d3fc5 Mon Sep 17 00:00:00 2001
From: dance858 <danielcederberg1@gmail.com>
Date: Tue, 14 Apr 2026 12:29:08 -0700
Subject: [PATCH 02/17] readme to read

---
 README.md | 221 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 218 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index f515061..b31cd68 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,10 @@
 # SparseDiffPy
 
-Python bindings for [SparseDiffEngine](https://github.com/SparseDifferentiation/SparseDiffEngine), a C library for computing sparse Jacobians and Hessians.
+Python library for computing **sparse Jacobians** and **Hessians** of nonlinear expressions via automatic differentiation.
+
+SparseDiffPy wraps [SparseDiffEngine](https://github.com/SparseDifferentiation/SparseDiffEngine), a C library that exploits the sparsity structure of expression graphs to compute derivatives efficiently. Instead of building dense Jacobian and Hessian matrices, SparseDiffPy analyzes the expression graph at compile time to determine which entries are structurally nonzero, then computes only those entries. The results are returned as `scipy.sparse.csr_matrix` objects, ready for use in optimization solvers.
+
+This matters for large-scale nonlinear optimization, where the Jacobian and Hessian are typically very sparse — a variable in one constraint rarely affects all other constraints. Computing full dense matrices wastes both time and memory.
 
 ## Installation
 
@@ -8,10 +12,221 @@ Python bindings for [SparseDiffEngine](https://github.com/SparseDifferentiation/
 pip install sparsediffpy
 ```
 
-## Usage
+Requires Python >= 3.11 and NumPy >= 2.0.0.
+
+### Building from source
+
+```bash
+git clone https://github.com/SparseDifferentiation/SparseDiffPy.git
+cd SparseDiffPy
+git submodule update --init
+pip install -e .
+```
+
+## Quick start
+
+```python
+import sparsediffpy as sp
+import numpy as np
+
+# 1. Create a scope (owns the variable space)
+scope = sp.Scope()
+
+# 2. Declare variables (always 2D: rows, cols)
+x = scope.Variable(3, 1)   # column vector with 3 elements
+
+# 3. Build an expression using operators and functions
+f = sp.exp(x) + 2.0 * x
+
+# 4. Compile (analyzes sparsity — do this once)
+fn = sp.compile(f)
+
+# 5. Set variable values and evaluate
+x.value = np.array([1.0, 2.0, 3.0])
+
+fn.forward()                        # array([4.71828183, 11.3890561, 26.08553692])
+fn.jacobian()                       # 3x3 sparse CSR matrix
+fn.hessian(weights=np.ones(3))      # 3x3 sparse CSR matrix
+```
+
+## Core concepts
+
+### Scope, Variables, and Parameters
+
+A **Scope** owns the flat variable buffer. All variables must belong to a scope.
+
+```python
+scope = sp.Scope()
+x = scope.Variable(3, 1)       # 3x1 column vector
+Y = scope.Variable(2, 3)       # 2x3 matrix
+a = scope.Variable(1, 1)       # scalar
+```
+
+**Parameters** are fixed data that can be updated without recompiling:
+
+```python
+A = scope.Parameter(3, 3, value=np.eye(3))
+
+f = A @ x
+fn = sp.compile(f)
+
+x.value = np.array([1.0, 2.0, 3.0])
+fn.jacobian()       # uses A = eye(3)
+
+A.value = 2 * np.eye(3)
+fn.jacobian()       # uses A = 2*eye(3), no recompile needed
+```
+
+### Building expressions
+
+Use Python operators and named functions. NumPy arrays and scalars auto-convert to constants:
+
+```python
+x = scope.Variable(3, 1)
+A = np.array([[1, 0, 0], [0, 2, 0], [0, 0, 3]], dtype=float)
+b = np.array([1.0, 2.0, 3.0])
+
+f = A @ x + b                # linear
+g = sp.sin(x) + sp.exp(x)    # nonlinear elementwise
+h = x.T @ x                  # quadratic (scalar output)
+```
+
+Broadcasting follows NumPy/CVXPY conventions:
+
+```python
+a = scope.Variable(1, 1)     # scalar
+X = scope.Variable(3, 2)     # matrix
+r = scope.Variable(1, 2)     # row vector
+c = scope.Variable(3, 1)     # column vector
+
+f = a * X + r + c            # scalar, row, and column all broadcast to (3, 2)
+```
+
+### Compiling and evaluating
+
+`sp.compile(f)` analyzes the sparsity pattern of the expression (this is the expensive step — do it once). The returned object is cheap to evaluate repeatedly:
+
+```python
+fn = sp.compile(f)
+
+x.value = some_values
+fn.forward()                     # evaluate f(x)
+fn.jacobian()                    # sparse Jacobian df/dx as csr_matrix
+fn.hessian(weights=w)            # sparse Hessian of w^T f(x) as csr_matrix
+```
+
+For a vector-valued function `f: R^n -> R^m`, the **Jacobian** is the `m x n` matrix of partial derivatives. The **Hessian** requires a weight vector `w` of length `m` — it computes the `n x n` Hessian of the scalar function `w^T f(x)`.
+
+### Solver integration
+
+In a solver loop, use `scope.set_values()` to write the entire flat variable vector at once:
+
+```python
+scope = sp.Scope()
+x = scope.Variable(3, 1)
+f = sp.sin(x)
+fn = sp.compile(f)
+
+def eval_f(x_flat):
+    scope.set_values(x_flat)
+    return fn.forward()
+
+def eval_jac(x_flat):
+    scope.set_values(x_flat)
+    return fn.jacobian()
+
+def eval_hess(x_flat, weights):
+    scope.set_values(x_flat)
+    return fn.hessian(weights)
+
+# These functions can be passed directly to scipy.optimize, IPOPT, etc.
+```
+
+With multiple variables, the flat vector concatenates them in declaration order:
+
+```python
+scope = sp.Scope()
+x = scope.Variable(3, 1)    # occupies flat positions [0, 1, 2]
+y = scope.Variable(2, 1)    # occupies flat positions [3, 4]
+
+# In a solver callback:
+def eval_jac(z_flat):
+    scope.set_values(z_flat)   # z_flat has length 5
+    return fn.jacobian()       # returns a sparse matrix with 5 columns
+```
+
+## Supported operations
+
+### Arithmetic operators
+
+| Operator | Description |
+|---|---|
+| `x + y` | Addition (with broadcasting) |
+| `x - y` | Subtraction (with broadcasting) |
+| `-x` | Negation |
+| `x * y` | Elementwise multiplication (with broadcasting) |
+| `x @ y` | Matrix multiplication |
+| `x ** p` | Power (constant exponent) |
+| `x[i]`, `x[0:3]`, `x[[0, 2]]` | Indexing and slicing |
+| `x.T` | Transpose |
+
+### Elementwise functions
+
+| Function | Description | Domain |
+|---|---|---|
+| `sp.exp(x)` | Exponential | all reals |
+| `sp.sin(x)` | Sine | all reals |
+| `sp.cos(x)` | Cosine | all reals |
+| `sp.tan(x)` | Tangent | x != pi/2 + k*pi |
+| `sp.sinh(x)` | Hyperbolic sine | all reals |
+| `sp.tanh(x)` | Hyperbolic tangent | all reals |
+| `sp.asinh(x)` | Inverse hyperbolic sine | all reals |
+| `sp.atanh(x)` | Inverse hyperbolic tangent | \|x\| < 1 |
+| `sp.log(x)` | Natural logarithm | x > 0 |
+| `sp.logistic(x)` | Softplus: log(1 + exp(x)) | all reals |
+| `sp.normal_cdf(x)` | Standard normal CDF | all reals |
+| `sp.entr(x)` | Entropy: -x log(x) | x > 0 |
+| `sp.xexp(x)` | x exp(x) | all reals |
+| `sp.power(x, p)` | Power with float exponent | depends on p |
+
+### Reductions
+
+| Function | Description |
+|---|---|
+| `sp.sum(x)` | Sum all elements |
+| `sp.sum(x, axis=0)` | Sum along rows |
+| `sp.sum(x, axis=1)` | Sum along columns |
+| `sp.prod(x)` | Product of all elements |
+| `sp.prod(x, axis=0)` | Product along rows |
+| `sp.prod(x, axis=1)` | Product along columns |
+| `sp.trace(X)` | Matrix trace (square matrices) |
+
+### Structural operations
+
+| Function | Description |
+|---|---|
+| `sp.hstack([a, b, c])` | Horizontal concatenation (same row count) |
+| `sp.vstack([a, b, c])` | Vertical concatenation (same column count) |
+| `sp.reshape(x, d1, d2)` | Reshape (preserves total size) |
+| `sp.diag_vec(x)` | Diagonal matrix from column vector |
+
+### Special functions
+
+| Function | Description |
+|---|---|
+| `sp.quad_form(x, Q)` | Quadratic form: x^T Q x |
+| `sp.quad_over_lin(x, z)` | sum(x^2) / z |
+| `sp.rel_entr(x, y)` | Relative entropy: x log(x/y) |
+
+## Shapes
+
+All shapes are 2D tuples `(rows, cols)`, matching the underlying C engine. There is no 1D shorthand — use `Variable(3, 1)` for a column vector, `Variable(1, 3)` for a row vector:
 
 ```python
-from sparsediffpy import _sparsediffengine
+x = scope.Variable(3, 1)     # column vector
+r = scope.Variable(1, 3)     # row vector
+M = scope.Variable(3, 3)     # matrix
+a = scope.Variable(1, 1)     # scalar
 ```
 
 ## License

From c0e093699ec7ddb88fe43b75ea15b814aa1cd0c5 Mon Sep 17 00:00:00 2001
From: dance858 <danielcederberg1@gmail.com>
Date: Tue, 14 Apr 2026 12:40:48 -0700
Subject: [PATCH 03/17] move some stuff

---
 sparsediffpy/_core/_constants.py    | 34 +--------------
 sparsediffpy/_core/_expression.py   | 50 ++++++++++++++++++---
 sparsediffpy/_core/_functions.py    | 68 ++++++++++++++---------------
 sparsediffpy/_core/_nodes_affine.py |  3 +-
 4 files changed, 81 insertions(+), 74 deletions(-)

diff --git a/sparsediffpy/_core/_constants.py b/sparsediffpy/_core/_constants.py
index ddbc6c0..5eab192 100644
--- a/sparsediffpy/_core/_constants.py
+++ b/sparsediffpy/_core/_constants.py
@@ -1,4 +1,4 @@
-"""Constant and SparseConstant expression nodes, plus _wrap_constant."""
+"""Constant and SparseConstant expression nodes."""
 
 import numpy as np
 import scipy.sparse
@@ -47,35 +47,3 @@ def _to_dense_flat(self):
             shape=self.shape,
         ).toarray()
         return dense.ravel(order="F").astype(np.float64)
-
-
-def _wrap_constant(value):
-    """Wrap a raw value into an expression node.
-
-    - Expression subclass -> return as-is
-    - int / float -> Constant with shape (1, 1)
-    - np.ndarray 1D (n,) -> Constant with shape (n, 1) (column vector)
-    - np.ndarray 2D (m, n) -> Constant with shape (m, n)
-    - scipy.sparse -> SparseConstant
-    """
-    # Avoid circular import: check for Expression base by duck-typing
-    # (has a .shape attribute and is from our module)
-    if hasattr(value, "_is_sparsediff_expr"):
-        return value
-
-    if isinstance(value, (int, float)):
-        return Constant(np.array([float(value)]), (1, 1))
-
-    if isinstance(value, np.ndarray):
-        if value.ndim == 0:
-            return Constant(np.array([value.item()]), (1, 1))
-        if value.ndim == 1:
-            return Constant(value, (value.shape[0], 1))
-        if value.ndim == 2:
-            return Constant(value, (value.shape[0], value.shape[1]))
-        raise ValueError(f"Cannot wrap {value.ndim}D array as constant")
-
-    if scipy.sparse.issparse(value):
-        return SparseConstant(value)
-
-    raise TypeError(f"Cannot convert {type(value).__name__} to expression")
diff --git a/sparsediffpy/_core/_expression.py b/sparsediffpy/_core/_expression.py
index ad6f9d5..85d7aa7 100644
--- a/sparsediffpy/_core/_expression.py
+++ b/sparsediffpy/_core/_expression.py
@@ -1,16 +1,13 @@
-"""Expression base class, operator overloading, and dispatch helpers.
+"""Expression base class, operator overloading, dispatch helpers, and _wrap_constant.
 
 Node types are defined in _nodes_affine.py, _nodes_elementwise.py,
 _nodes_bivariate.py, and _nodes_other.py.
 """
 
 import numpy as np
+import scipy.sparse
 
-from sparsediffpy._core._constants import (
-    Constant,
-    SparseConstant,
-    _wrap_constant,
-)
+from sparsediffpy._core._constants import Constant, SparseConstant
 from sparsediffpy._core._shapes import (
     broadcast_shape,
     check_matmul_shapes,
@@ -18,6 +15,47 @@
 )
 
 
+# ---------------------------------------------------------------------------
+# _wrap_constant: converts raw values into expression nodes
+# ---------------------------------------------------------------------------
+
+def _wrap_constant(value):
+    """Wrap a raw Python/NumPy/SciPy value into an expression node.
+
+    Called by operators and node constructors so users can write
+    ``x + 1.0`` or ``A @ x`` with raw scalars/arrays.
+
+    - Expression subclass -> return as-is
+    - int / float -> Constant with shape (1, 1)
+    - np.ndarray 1D (n,) -> Constant with shape (n, 1) (column vector)
+    - np.ndarray 2D (m, n) -> Constant with shape (m, n)
+    - scipy.sparse -> SparseConstant
+    """
+    if hasattr(value, "_is_sparsediff_expr"):
+        return value
+
+    if isinstance(value, (int, float)):
+        return Constant(np.array([float(value)]), (1, 1))
+
+    if isinstance(value, np.ndarray):
+        if value.ndim == 0:
+            return Constant(np.array([value.item()]), (1, 1))
+        if value.ndim == 1:
+            return Constant(value, (value.shape[0], 1))
+        if value.ndim == 2:
+            return Constant(value, (value.shape[0], value.shape[1]))
+        raise ValueError(f"Cannot wrap {value.ndim}D array as constant")
+
+    if scipy.sparse.issparse(value):
+        return SparseConstant(value)
+
+    raise TypeError(f"Cannot convert {type(value).__name__} to expression")
+
+
+# ---------------------------------------------------------------------------
+# Base class
+# ---------------------------------------------------------------------------
+
 class Expression:
     """Base class for all expression tree nodes."""
 
diff --git a/sparsediffpy/_core/_functions.py b/sparsediffpy/_core/_functions.py
index bd308ee..2fed548 100644
--- a/sparsediffpy/_core/_functions.py
+++ b/sparsediffpy/_core/_functions.py
@@ -1,9 +1,15 @@
-"""Module-level named functions: sp.sin, sp.exp, sp.hstack, etc."""
+"""Module-level named functions: sp.sin, sp.exp, sp.hstack, etc.
+
+Unary functions rely on _UnaryOp.__init__ to call _wrap_constant on the
+child, so they don't need to wrap explicitly. Multi-argument functions
+and structural ops that do shape validation before constructing nodes
+still wrap explicitly.
+"""
 
 import numpy as np
 import scipy.sparse
 
-from sparsediffpy._core._constants import _wrap_constant
+from sparsediffpy._core._expression import _wrap_constant
 from sparsediffpy._core._nodes_affine import (
     DiagVec, HStack, Reshape, Sum, Trace, Transpose,
 )
@@ -16,57 +22,52 @@
 from sparsediffpy._core._shapes import validate_shape
 
 
-def _ensure_expr(x):
-    if hasattr(x, "_is_sparsediff_expr"):
-        return x
-    return _wrap_constant(x)
-
-
 # ---------------------------------------------------------------------------
 # Unary elementwise functions
+# (_UnaryOp.__init__ handles _wrap_constant)
 # ---------------------------------------------------------------------------
 
 def sin(x):
-    return Sin(_ensure_expr(x))
+    return Sin(x)
 
 def cos(x):
-    return Cos(_ensure_expr(x))
+    return Cos(x)
 
 def exp(x):
-    return Exp(_ensure_expr(x))
+    return Exp(x)
 
 def log(x):
-    return Log(_ensure_expr(x))
+    return Log(x)
 
 def tan(x):
-    return Tan(_ensure_expr(x))
+    return Tan(x)
 
 def sinh(x):
-    return Sinh(_ensure_expr(x))
+    return Sinh(x)
 
 def tanh(x):
-    return Tanh(_ensure_expr(x))
+    return Tanh(x)
 
 def asinh(x):
-    return Asinh(_ensure_expr(x))
+    return Asinh(x)
 
 def atanh(x):
-    return Atanh(_ensure_expr(x))
+    return Atanh(x)
 
 def logistic(x):
-    return Logistic(_ensure_expr(x))
+    return Logistic(x)
 
 def normal_cdf(x):
-    return NormalCdf(_ensure_expr(x))
+    return NormalCdf(x)
 
 def entr(x):
-    return Entr(_ensure_expr(x))
+    return Entr(x)
 
 def xexp(x):
-    return Xexp(_ensure_expr(x))
+    return Xexp(x)
 
 def diag_vec(x):
-    return DiagVec(_ensure_expr(x))
+    return DiagVec(x)
 
 
 # ---------------------------------------------------------------------------
@@ -74,7 +75,7 @@ def diag_vec(x):
 # ---------------------------------------------------------------------------
 
 def power(x, p):
-    return Power(_ensure_expr(x), float(p))
+    return Power(x, float(p))
 
 
 def sum(x, axis=None):
@@ -85,7 +86,7 @@ def sum(x, axis=None):
     axis=1: sum along columns (collapse d2) -> (d1, 1)
     """
     c_axis = -1 if axis is None else axis
-    return Sum(_ensure_expr(x), c_axis)
+    return Sum(x, c_axis)
 
 
 def prod(x, axis=None):
@@ -95,7 +96,6 @@ def prod(x, axis=None):
     axis=0: product along rows -> (1, d2)
     axis=1: product along columns -> (d1, 1)
     """
-    x = _ensure_expr(x)
     if axis is None:
         return Prod(x)
     elif axis == 0:
@@ -108,11 +108,11 @@ def prod(x, axis=None):
 
 def reshape(x, d1, d2):
     validate_shape(d1, d2)
-    return Reshape(_ensure_expr(x), (d1, d2))
+    return Reshape(x, (d1, d2))
 
 
 def trace(x):
-    return Trace(_ensure_expr(x))
+    return Trace(x)
 
 
 # ---------------------------------------------------------------------------
@@ -124,7 +124,7 @@ def hstack(expressions):
 
     Result shape: (d1, sum of all d2).
     """
-    exprs = [_ensure_expr(e) for e in expressions]
+    exprs = [_wrap_constant(e) for e in expressions]
     if not exprs:
         raise ValueError("hstack requires at least one expression")
 
@@ -145,7 +145,7 @@ def vstack(expressions):
 
     Implemented as transpose(hstack(transpose(each))).
     """
-    exprs = [_ensure_expr(e) for e in expressions]
+    exprs = [_wrap_constant(e) for e in expressions]
     if not exprs:
         raise ValueError("vstack requires at least one expression")
 
@@ -178,7 +178,7 @@ def quad_form(x, Q):
     x must be a column vector (n, 1).
     Q must be a scipy.sparse matrix or np.ndarray of shape (n, n).
     """
-    x = _ensure_expr(x)
+    x = _wrap_constant(x)
     if x.shape[1] != 1:
         raise ValueError(f"quad_form: x must be a column vector, got shape {x.shape}")
 
@@ -204,13 +204,13 @@ def quad_form(x, Q):
 
 def quad_over_lin(x, z):
     """sum(x^2) / z where z is a scalar expression."""
-    x = _ensure_expr(x)
-    z = _ensure_expr(z)
+    x = _wrap_constant(x)
+    z = _wrap_constant(z)
     return QuadOverLin(x, z)
 
 
 def rel_entr(x, y):
     """x * log(x / y) elementwise."""
-    x = _ensure_expr(x)
-    y = _ensure_expr(y)
+    x = _wrap_constant(x)
+    y = _wrap_constant(y)
     return RelEntr(x, y)
diff --git a/sparsediffpy/_core/_nodes_affine.py b/sparsediffpy/_core/_nodes_affine.py
index fa766bb..bbe71ce 100644
--- a/sparsediffpy/_core/_nodes_affine.py
+++ b/sparsediffpy/_core/_nodes_affine.py
@@ -2,11 +2,12 @@
 
 import numpy as np
 
-from sparsediffpy._core._expression import Expression
+from sparsediffpy._core._expression import Expression, _wrap_constant
 
 
 class _UnaryOp(Expression):
     def __init__(self, child):
+        child = _wrap_constant(child)
         self.child = child
         self.shape = child.shape
 

From 61f59d0edb8e2365b9d013dbc070c4a5f6153248 Mon Sep 17 00:00:00 2001
From: dance858 <danielcederberg1@gmail.com>
Date: Tue, 14 Apr 2026 12:44:45 -0700
Subject: [PATCH 04/17] better file structure

---
 sparsediffpy/__init__.py              |  40 ++---
 sparsediffpy/_core/_fn_affine.py      |  95 +++++++++++
 sparsediffpy/_core/_fn_bivariate.py   |  52 +++++++
 sparsediffpy/_core/_fn_elementwise.py |  52 +++++++
 sparsediffpy/_core/_functions.py      | 216 --------------------------
 5 files changed, 213 insertions(+), 242 deletions(-)
 create mode 100644 sparsediffpy/_core/_fn_affine.py
 create mode 100644 sparsediffpy/_core/_fn_bivariate.py
 create mode 100644 sparsediffpy/_core/_fn_elementwise.py
 delete mode 100644 sparsediffpy/_core/_functions.py

diff --git a/sparsediffpy/__init__.py b/sparsediffpy/__init__.py
index 1f361fc..1d656a3 100644
--- a/sparsediffpy/__init__.py
+++ b/sparsediffpy/__init__.py
@@ -24,30 +24,18 @@
 # Compile
 from sparsediffpy._core._compile import compile  # noqa: F401
 
-# Named functions
-from sparsediffpy._core._functions import (  # noqa: F401
-    sin,
-    cos,
-    exp,
-    log,
-    tan,
-    sinh,
-    tanh,
-    asinh,
-    atanh,
-    logistic,
-    normal_cdf,
-    entr,
-    xexp,
-    diag_vec,
-    power,
-    sum,
-    prod,
-    reshape,
-    trace,
-    hstack,
-    vstack,
-    quad_form,
-    quad_over_lin,
-    rel_entr,
+# Elementwise functions
+from sparsediffpy._core._fn_elementwise import (  # noqa: F401
+    sin, cos, exp, log, tan, sinh, tanh, asinh, atanh,
+    logistic, normal_cdf, entr, xexp, power,
+)
+
+# Affine / structural functions
+from sparsediffpy._core._fn_affine import (  # noqa: F401
+    diag_vec, trace, reshape, sum, prod, hstack, vstack,
+)
+
+# Bivariate / special functions
+from sparsediffpy._core._fn_bivariate import (  # noqa: F401
+    quad_form, quad_over_lin, rel_entr,
 )
diff --git a/sparsediffpy/_core/_fn_affine.py b/sparsediffpy/_core/_fn_affine.py
new file mode 100644
index 0000000..2ee0ab6
--- /dev/null
+++ b/sparsediffpy/_core/_fn_affine.py
@@ -0,0 +1,95 @@
+"""Affine named functions: sp.sum, sp.prod, sp.reshape, sp.hstack, etc."""
+
+import builtins as _builtins
+
+from sparsediffpy._core._expression import _wrap_constant
+from sparsediffpy._core._nodes_affine import (
+    DiagVec, HStack, Reshape, Sum, Trace, Transpose,
+)
+from sparsediffpy._core._nodes_other import Prod, ProdAxisOne, ProdAxisZero
+from sparsediffpy._core._shapes import validate_shape
+
+_builtin_sum = _builtins.sum
+
+
+def diag_vec(x):
+    return DiagVec(x)
+
+def trace(x):
+    return Trace(x)
+
+def reshape(x, d1, d2):
+    validate_shape(d1, d2)
+    return Reshape(x, (d1, d2))
+
+
+def sum(x, axis=None):
+    """Sum reduction.
+
+    axis=None: sum all elements -> (1,1)
+    axis=0: sum along rows (collapse d1) -> (1, d2)
+    axis=1: sum along columns (collapse d2) -> (1, d1)
+    """
+    c_axis = -1 if axis is None else axis
+    return Sum(x, c_axis)
+
+
+def prod(x, axis=None):
+    """Product reduction.
+
+    axis=None: product of all elements -> (1,1)
+    axis=0: product along rows -> (1, d2)
+    axis=1: product along columns -> (1, d1)
+    """
+    if axis is None:
+        return Prod(x)
+    elif axis == 0:
+        return ProdAxisZero(x)
+    elif axis == 1:
+        return ProdAxisOne(x)
+    else:
+        raise ValueError(f"Invalid axis {axis}, must be None, 0, or 1")
+
+
+def hstack(expressions):
+    """Horizontally stack expressions. All must have the same d1 (rows).
+
+    Result shape: (d1, sum of all d2).
+    """
+    exprs = [_wrap_constant(e) for e in expressions]
+    if not exprs:
+        raise ValueError("hstack requires at least one expression")
+
+    d1 = exprs[0].shape[0]
+    for e in exprs[1:]:
+        if e.shape[0] != d1:
+            raise ValueError(
+                f"hstack: all expressions must have the same number of rows, "
+                f"got {d1} and {e.shape[0]}"
+            )
+
+    total_d2 = _builtin_sum(e.shape[1] for e in exprs)
+    return HStack(exprs, (d1, total_d2))
+
+
+def vstack(expressions):
+    """Vertically stack expressions. All must have the same d2 (columns).
+
+    Implemented as transpose(hstack(transpose(each))).
+    """
+    exprs = [_wrap_constant(e) for e in expressions]
+    if not exprs:
+        raise ValueError("vstack requires at least one expression")
+
+    d2 = exprs[0].shape[1]
+    for e in exprs[1:]:
+        if e.shape[1] != d2:
+            raise ValueError(
+                f"vstack: all expressions must have the same number of columns, "
+                f"got {d2} and {e.shape[1]}"
+            )
+
+    transposed = [Transpose(e) for e in exprs]
+    total_d1 = _builtin_sum(e.shape[0] for e in exprs)
+    h = HStack(transposed, (d2, total_d1))
+    return Transpose(h)
diff --git a/sparsediffpy/_core/_fn_bivariate.py b/sparsediffpy/_core/_fn_bivariate.py
new file mode 100644
index 0000000..4ae3487
--- /dev/null
+++ b/sparsediffpy/_core/_fn_bivariate.py
@@ -0,0 +1,52 @@
+"""Bivariate named functions: sp.quad_form, sp.quad_over_lin, sp.rel_entr."""
+
+import numpy as np
+import scipy.sparse
+
+from sparsediffpy._core._expression import _wrap_constant
+from sparsediffpy._core._nodes_bivariate import QuadOverLin, RelEntr
+from sparsediffpy._core._nodes_other import QuadForm
+
+
+def quad_form(x, Q):
+    """Quadratic form x' Q x.
+
+    x must be a column vector (n, 1).
+    Q must be a scipy.sparse matrix or np.ndarray of shape (n, n).
+    """
+    x = _wrap_constant(x)
+    if x.shape[1] != 1:
+        raise ValueError(f"quad_form: x must be a column vector, got shape {x.shape}")
+
+    if not scipy.sparse.issparse(Q):
+        Q = scipy.sparse.csr_matrix(Q)
+    else:
+        Q = Q.tocsr()
+
+    n = x.shape[0]
+    if Q.shape != (n, n):
+        raise ValueError(
+            f"quad_form: Q shape {Q.shape} doesn't match x shape {x.shape}"
+        )
+
+    return QuadForm(
+        x,
+        Q_csr_data=np.asarray(Q.data, dtype=np.float64),
+        Q_csr_indices=np.asarray(Q.indices, dtype=np.int32),
+        Q_csr_indptr=np.asarray(Q.indptr, dtype=np.int32),
+        Q_shape=Q.shape,
+    )
+
+
+def quad_over_lin(x, z):
+    """sum(x^2) / z where z is a scalar expression."""
+    x = _wrap_constant(x)
+    z = _wrap_constant(z)
+    return QuadOverLin(x, z)
+
+
+def rel_entr(x, y):
+    """x * log(x / y) elementwise."""
+    x = _wrap_constant(x)
+    y = _wrap_constant(y)
+    return RelEntr(x, y)
diff --git a/sparsediffpy/_core/_fn_elementwise.py b/sparsediffpy/_core/_fn_elementwise.py
new file mode 100644
index 0000000..3227d7e
--- /dev/null
+++ b/sparsediffpy/_core/_fn_elementwise.py
@@ -0,0 +1,52 @@
+"""Elementwise named functions: sp.sin, sp.exp, sp.log, etc.
+
+_UnaryOp.__init__ handles _wrap_constant, so these are one-liners.
+"""
+
+from sparsediffpy._core._nodes_elementwise import (
+    Asinh, Atanh, Cos, Entr, Exp, Log, Logistic, NormalCdf, Power,
+    Sin, Sinh, Tan, Tanh, Xexp,
+)
+
+
+def sin(x):
+    return Sin(x)
+
+def cos(x):
+    return Cos(x)
+
+def exp(x):
+    return Exp(x)
+
+def log(x):
+    return Log(x)
+
+def tan(x):
+    return Tan(x)
+
+def sinh(x):
+    return Sinh(x)
+
+def tanh(x):
+    return Tanh(x)
+
+def asinh(x):
+    return Asinh(x)
+
+def atanh(x):
+    return Atanh(x)
+
+def logistic(x):
+    return Logistic(x)
+
+def normal_cdf(x):
+    return NormalCdf(x)
+
+def entr(x):
+    return Entr(x)
+
+def xexp(x):
+    return Xexp(x)
+
+def power(x, p):
+    return Power(x, float(p))
diff --git a/sparsediffpy/_core/_functions.py b/sparsediffpy/_core/_functions.py
deleted file mode 100644
index 2fed548..0000000
--- a/sparsediffpy/_core/_functions.py
+++ /dev/null
@@ -1,216 +0,0 @@
-"""Module-level named functions: sp.sin, sp.exp, sp.hstack, etc.
-
-Unary functions rely on _UnaryOp.__init__ to call _wrap_constant on the
-child, so they don't need to wrap explicitly. Multi-argument functions
-and structural ops that do shape validation before constructing nodes
-still wrap explicitly.
-"""
-
-import numpy as np
-import scipy.sparse
-
-from sparsediffpy._core._expression import _wrap_constant
-from sparsediffpy._core._nodes_affine import (
-    DiagVec, HStack, Reshape, Sum, Trace, Transpose,
-)
-from sparsediffpy._core._nodes_bivariate import QuadOverLin, RelEntr
-from sparsediffpy._core._nodes_elementwise import (
-    Asinh, Atanh, Cos, Entr, Exp, Log, Logistic, NormalCdf, Power,
-    Sin, Sinh, Tan, Tanh, Xexp,
-)
-from sparsediffpy._core._nodes_other import Prod, ProdAxisOne, ProdAxisZero, QuadForm
-from sparsediffpy._core._shapes import validate_shape
-
-
-# ---------------------------------------------------------------------------
-# Unary elementwise functions
-# (_UnaryOp.__init__ handles _wrap_constant)
-# ---------------------------------------------------------------------------
-
-def sin(x):
-    return Sin(x)
-
-def cos(x):
-    return Cos(x)
-
-def exp(x):
-    return Exp(x)
-
-def log(x):
-    return Log(x)
-
-def tan(x):
-    return Tan(x)
-
-def sinh(x):
-    return Sinh(x)
-
-def tanh(x):
-    return Tanh(x)
-
-def asinh(x):
-    return Asinh(x)
-
-def atanh(x):
-    return Atanh(x)
-
-def logistic(x):
-    return Logistic(x)
-
-def normal_cdf(x):
-    return NormalCdf(x)
-
-def entr(x):
-    return Entr(x)
-
-def xexp(x):
-    return Xexp(x)
-
-def diag_vec(x):
-    return DiagVec(x)
-
-
-# ---------------------------------------------------------------------------
-# Unary with extra arguments
-# ---------------------------------------------------------------------------
-
-def power(x, p):
-    return Power(x, float(p))
-
-
-def sum(x, axis=None):
-    """Sum reduction.
-
-    axis=None: sum all elements -> (1,1)
-    axis=0: sum along rows (collapse d1) -> (1, d2)
-    axis=1: sum along columns (collapse d2) -> (d1, 1)
-    """
-    c_axis = -1 if axis is None else axis
-    return Sum(x, c_axis)
-
-
-def prod(x, axis=None):
-    """Product reduction.
-
-    axis=None: product of all elements -> (1,1)
-    axis=0: product along rows -> (1, d2)
-    axis=1: product along columns -> (d1, 1)
-    """
-    if axis is None:
-        return Prod(x)
-    elif axis == 0:
-        return ProdAxisZero(x)
-    elif axis == 1:
-        return ProdAxisOne(x)
-    else:
-        raise ValueError(f"Invalid axis {axis}, must be None, 0, or 1")
-
-
-def reshape(x, d1, d2):
-    validate_shape(d1, d2)
-    return Reshape(x, (d1, d2))
-
-
-def trace(x):
-    return Trace(x)
-
-
-# ---------------------------------------------------------------------------
-# Structural
-# ---------------------------------------------------------------------------
-
-def hstack(expressions):
-    """Horizontally stack expressions. All must have the same d1 (rows).
-
-    Result shape: (d1, sum of all d2).
-    """
-    exprs = [_wrap_constant(e) for e in expressions]
-    if not exprs:
-        raise ValueError("hstack requires at least one expression")
-
-    d1 = exprs[0].shape[0]
-    for e in exprs[1:]:
-        if e.shape[0] != d1:
-            raise ValueError(
-                f"hstack: all expressions must have the same number of rows, "
-                f"got {d1} and {e.shape[0]}"
-            )
-
-    total_d2 = builtins_sum(e.shape[1] for e in exprs)
-    return HStack(exprs, (d1, total_d2))
-
-
-def vstack(expressions):
-    """Vertically stack expressions. All must have the same d2 (columns).
-
-    Implemented as transpose(hstack(transpose(each))).
-    """
-    exprs = [_wrap_constant(e) for e in expressions]
-    if not exprs:
-        raise ValueError("vstack requires at least one expression")
-
-    d2 = exprs[0].shape[1]
-    for e in exprs[1:]:
-        if e.shape[1] != d2:
-            raise ValueError(
-                f"vstack: all expressions must have the same number of columns, "
-                f"got {d2} and {e.shape[1]}"
-            )
-
-    transposed = [Transpose(e) for e in exprs]
-    total_d1 = builtins_sum(e.shape[0] for e in exprs)
-    h = HStack(transposed, (d2, total_d1))
-    return Transpose(h)
-
-
-# Keep a reference to Python's built-in sum (shadowed by our sum function)
-import builtins as _builtins
-builtins_sum = _builtins.sum
-
-
-# ---------------------------------------------------------------------------
-# Special functions
-# ---------------------------------------------------------------------------
-
-def quad_form(x, Q):
-    """Quadratic form x' Q x.
-
-    x must be a column vector (n, 1).
-    Q must be a scipy.sparse matrix or np.ndarray of shape (n, n).
-    """
-    x = _wrap_constant(x)
-    if x.shape[1] != 1:
-        raise ValueError(f"quad_form: x must be a column vector, got shape {x.shape}")
-
-    if not scipy.sparse.issparse(Q):
-        Q = scipy.sparse.csr_matrix(Q)
-    else:
-        Q = Q.tocsr()
-
-    n = x.shape[0]
-    if Q.shape != (n, n):
-        raise ValueError(
-            f"quad_form: Q shape {Q.shape} doesn't match x shape {x.shape}"
-        )
-
-    return QuadForm(
-        x,
-        Q_csr_data=np.asarray(Q.data, dtype=np.float64),
-        Q_csr_indices=np.asarray(Q.indices, dtype=np.int32),
-        Q_csr_indptr=np.asarray(Q.indptr, dtype=np.int32),
-        Q_shape=Q.shape,
-    )
-
-
-def quad_over_lin(x, z):
-    """sum(x^2) / z where z is a scalar expression."""
-    x = _wrap_constant(x)
-    z = _wrap_constant(z)
-    return QuadOverLin(x, z)
-
-
-def rel_entr(x, y):
-    """x * log(x / y) elementwise."""
-    x = _wrap_constant(x)
-    y = _wrap_constant(y)
-    return RelEntr(x, y)

From 0f2473fba4e722bfbadcf7f4c262e088392ed859 Mon Sep 17 00:00:00 2001
From: dance858 <danielcederberg1@gmail.com>
Date: Tue, 14 Apr 2026 12:50:56 -0700
Subject: [PATCH 05/17] more versions of relative entropy

---
 sparsediffpy/_core/_nodes_bivariate.py | 22 +++++--
 tests/other/test_rel_entr.py           | 86 +++++++++++++++++++++++++-
 tests/test_validation.py               |  2 +-
 3 files changed, 102 insertions(+), 8 deletions(-)

diff --git a/sparsediffpy/_core/_nodes_bivariate.py b/sparsediffpy/_core/_nodes_bivariate.py
index 3ecd70a..26a97d0 100644
--- a/sparsediffpy/_core/_nodes_bivariate.py
+++ b/sparsediffpy/_core/_nodes_bivariate.py
@@ -32,10 +32,24 @@ def __init__(self, x, z):
 
 
 class RelEntr(Expression):
-    """x * log(x / y) elementwise."""
+    """x * log(x / y) elementwise.
+
+    Supports three variants (auto-dispatched by the C layer):
+    - Both same shape: elementwise
+    - Scalar x, vector y: x * log(x / y_i) for each i
+    - Vector x, scalar y: x_i * log(x_i / y) for each i
+    """
     def __init__(self, x, y):
-        if x.shape != y.shape:
-            raise ValueError(f"rel_entr: shape mismatch {x.shape} vs {y.shape}")
+        if x.shape == y.shape:
+            self.shape = x.shape
+        elif is_scalar(x.shape):
+            self.shape = y.shape
+        elif is_scalar(y.shape):
+            self.shape = x.shape
+        else:
+            raise ValueError(
+                f"rel_entr: shapes must match or one must be scalar, "
+                f"got {x.shape} and {y.shape}"
+            )
         self.x = x
         self.y = y
-        self.shape = x.shape
diff --git a/tests/other/test_rel_entr.py b/tests/other/test_rel_entr.py
index c2dc531..d4b51bc 100644
--- a/tests/other/test_rel_entr.py
+++ b/tests/other/test_rel_entr.py
@@ -1,9 +1,12 @@
 import numpy as np
+import pytest
 import sparsediffpy as sp
 from tests.utils import NumericalDerivativeChecker, random_positive_point
 
 
-def test_rel_entr_jacobian(scope, rng):
+# --- Vector-vector (both same shape) ---
+
+def test_rel_entr_vector_jacobian(scope, rng):
     x = scope.Variable(3, 1)
     y = scope.Variable(3, 1)
     f = sp.rel_entr(x, y)
@@ -12,7 +15,7 @@ def test_rel_entr_jacobian(scope, rng):
     checker.check_jacobian(random_positive_point(scope, rng))
 
 
-def test_rel_entr_hessian(scope, rng):
+def test_rel_entr_vector_hessian(scope, rng):
     x = scope.Variable(3, 1)
     y = scope.Variable(3, 1)
     f = sp.rel_entr(x, y)
@@ -22,7 +25,7 @@ def test_rel_entr_hessian(scope, rng):
     checker.check_hessian(x0, rng.standard_normal(3))
 
 
-def test_rel_entr_forward(scope, rng):
+def test_rel_entr_vector_forward(scope, rng):
     x = scope.Variable(3, 1)
     y = scope.Variable(3, 1)
     f = sp.rel_entr(x, y)
@@ -32,3 +35,80 @@ def test_rel_entr_forward(scope, rng):
     y_val = y.value
     expected = x_val * np.log(x_val / y_val)
     np.testing.assert_allclose(fn.forward(), expected)
+
+
+# --- Scalar x, vector y ---
+
+def test_rel_entr_scalar_vector_jacobian(scope, rng):
+    x = scope.Variable(1, 1)
+    y = scope.Variable(3, 1)
+    f = sp.rel_entr(x, y)
+    assert f.shape == (3, 1)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_positive_point(scope, rng))
+
+
+def test_rel_entr_scalar_vector_hessian(scope, rng):
+    x = scope.Variable(1, 1)
+    y = scope.Variable(3, 1)
+    f = sp.rel_entr(x, y)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_positive_point(scope, rng)
+    checker.check_hessian(x0, rng.standard_normal(3))
+
+
+def test_rel_entr_scalar_vector_forward(scope, rng):
+    x = scope.Variable(1, 1)
+    y = scope.Variable(3, 1)
+    f = sp.rel_entr(x, y)
+    fn = sp.compile(f)
+    x0 = random_positive_point(scope, rng)
+    x_val = x.value[0]
+    y_val = y.value
+    expected = x_val * np.log(x_val / y_val)
+    np.testing.assert_allclose(fn.forward(), expected)
+
+
+# --- Vector x, scalar y ---
+
+def test_rel_entr_vector_scalar_jacobian(scope, rng):
+    x = scope.Variable(3, 1)
+    y = scope.Variable(1, 1)
+    f = sp.rel_entr(x, y)
+    assert f.shape == (3, 1)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    checker.check_jacobian(random_positive_point(scope, rng))
+
+
+def test_rel_entr_vector_scalar_hessian(scope, rng):
+    x = scope.Variable(3, 1)
+    y = scope.Variable(1, 1)
+    f = sp.rel_entr(x, y)
+    fn = sp.compile(f)
+    checker = NumericalDerivativeChecker(fn, scope)
+    x0 = random_positive_point(scope, rng)
+    checker.check_hessian(x0, rng.standard_normal(3))
+
+
+def test_rel_entr_vector_scalar_forward(scope, rng):
+    x = scope.Variable(3, 1)
+    y = scope.Variable(1, 1)
+    f = sp.rel_entr(x, y)
+    fn = sp.compile(f)
+    x0 = random_positive_point(scope, rng)
+    x_val = x.value
+    y_val = y.value[0]
+    expected = x_val * np.log(x_val / y_val)
+    np.testing.assert_allclose(fn.forward(), expected)
+
+
+# --- Shape mismatch ---
+
+def test_rel_entr_incompatible_shapes(scope):
+    x = scope.Variable(3, 1)
+    y = scope.Variable(2, 1)
+    with pytest.raises(ValueError, match="shapes must match or one must be scalar"):
+        sp.rel_entr(x, y)
diff --git a/tests/test_validation.py b/tests/test_validation.py
index 8925b1b..9e5f695 100644
--- a/tests/test_validation.py
+++ b/tests/test_validation.py
@@ -44,7 +44,7 @@ def test_matmul_inner_dim_mismatch(self, scope):
     def test_rel_entr_shape_mismatch(self, scope):
         x = scope.Variable(3, 1)
         y = scope.Variable(2, 1)
-        with pytest.raises(ValueError, match="shape mismatch"):
+        with pytest.raises(ValueError, match="shapes must match or one must be scalar"):
             sp.rel_entr(x, y)
 
     def test_quad_over_lin_non_scalar_z(self, scope):

From d45aa09c66788aa65ad6aa5a92b2f300d1aa5768 Mon Sep 17 00:00:00 2001
From: dance858 <danielcederberg1@gmail.com>
Date: Tue, 14 Apr 2026 13:02:23 -0700
Subject: [PATCH 06/17] remove unnecessary code

---
 sparsediffpy/_core/_expression.py      | 19 ++++---------
 sparsediffpy/_core/_nodes_affine.py    |  5 ++++
 sparsediffpy/_core/_nodes_bivariate.py | 31 ++++++++++++++++++++-
 sparsediffpy/_core/_nodes_other.py     | 20 ++++++++++++++
 tests/test_validation.py               | 38 ++++++++++++++++++++++++++
 5 files changed, 99 insertions(+), 14 deletions(-)

diff --git a/sparsediffpy/_core/_expression.py b/sparsediffpy/_core/_expression.py
index 85d7aa7..f3321ea 100644
--- a/sparsediffpy/_core/_expression.py
+++ b/sparsediffpy/_core/_expression.py
@@ -150,30 +150,23 @@ def size(self):
 # Operator dispatch helpers
 # ---------------------------------------------------------------------------
 
-def _maybe_broadcast(node, target_shape):
-    from sparsediffpy._core._nodes_affine import Broadcast
-    if node.shape == target_shape:
-        return node
-    return Broadcast(node, target_shape)
-
-
 def _is_param_like(node):
     from sparsediffpy._core._scope import Parameter
     return isinstance(node, (Constant, SparseConstant, Parameter))
 
 
 def _make_add(left, right):
-    from sparsediffpy._core._nodes_affine import Add
+    from sparsediffpy._core._nodes_affine import Add, Broadcast
     result_shape, left_bc, right_bc = broadcast_shape(left.shape, right.shape)
     if left_bc:
-        left = _maybe_broadcast(left, result_shape)
+        left = Broadcast(left, result_shape)
     if right_bc:
-        right = _maybe_broadcast(right, result_shape)
+        right = Broadcast(right, result_shape)
     return Add(left, right)
 
 
 def _make_mul(left, right):
-    from sparsediffpy._core._nodes_affine import ParamScalarMult, ParamVectorMult
+    from sparsediffpy._core._nodes_affine import Broadcast, ParamScalarMult, ParamVectorMult
     from sparsediffpy._core._nodes_bivariate import Multiply
     from sparsediffpy._core._scope import Parameter
 
@@ -189,9 +182,9 @@ def _make_mul(left, right):
 
     result_shape, left_bc, right_bc = broadcast_shape(left.shape, right.shape)
     if left_bc:
-        left = _maybe_broadcast(left, result_shape)
+        left = Broadcast(left, result_shape)
     if right_bc:
-        right = _maybe_broadcast(right, result_shape)
+        right = Broadcast(right, result_shape)
 
     if _is_param_like(left):
         return ParamVectorMult(left, right)
diff --git a/sparsediffpy/_core/_nodes_affine.py b/sparsediffpy/_core/_nodes_affine.py
index bbe71ce..d99eeb0 100644
--- a/sparsediffpy/_core/_nodes_affine.py
+++ b/sparsediffpy/_core/_nodes_affine.py
@@ -55,6 +55,11 @@ def __init__(self, child, new_shape):
 class Broadcast(Expression):
     """Broadcast scalar/row/column to a target shape."""
     def __init__(self, child, target_shape):
+        if child.shape == target_shape:
+            raise ValueError(
+                f"Broadcast is unnecessary: child shape {child.shape} "
+                f"already matches target {target_shape}"
+            )
         self.child = child
         self.shape = target_shape
 
diff --git a/sparsediffpy/_core/_nodes_bivariate.py b/sparsediffpy/_core/_nodes_bivariate.py
index 26a97d0..516cce1 100644
--- a/sparsediffpy/_core/_nodes_bivariate.py
+++ b/sparsediffpy/_core/_nodes_bivariate.py
@@ -4,6 +4,20 @@
 from sparsediffpy._core._shapes import is_scalar
 
 
+def _expr_contains_variable(expr, var):
+    """Check if a specific Variable object appears anywhere in an expression tree."""
+    if expr is var:
+        return True
+    for attr in ("child", "left", "right", "x", "y", "z", "param_expr", "matrix_expr"):
+        child = getattr(expr, attr, None)
+        if child is not None and _expr_contains_variable(child, var):
+            return True
+    for child in getattr(expr, "children", []):
+        if _expr_contains_variable(child, var):
+            return True
+    return False
+
+
 class Multiply(Expression):
     """Elementwise multiply (both operands are variable-dependent)."""
     def __init__(self, left, right):
@@ -22,10 +36,25 @@ def __init__(self, left, right, result_shape):
 
 
 class QuadOverLin(Expression):
-    """sum(x^2) / z where z is a scalar."""
+    """sum(x^2) / z where z is a scalar variable.
+
+    z must be a plain Variable and must not appear in x.
+    """
     def __init__(self, x, z):
         if not is_scalar(z.shape):
             raise ValueError(f"quad_over_lin: z must be scalar, got shape {z.shape}")
+        from sparsediffpy._core._scope import Variable
+        if not isinstance(z, Variable):
+            raise ValueError(
+                "quad_over_lin: z (second argument) must be a plain Variable. "
+                "The C engine requires this — compositions like quad_over_lin(x, f(y)) "
+                "are not supported."
+            )
+        if _expr_contains_variable(x, z):
+            raise ValueError(
+                "quad_over_lin: the denominator variable z must not appear in "
+                "the numerator x. Use separate variables for numerator and denominator."
+            )
         self.x = x
         self.z = z
         self.shape = (1, 1)
diff --git a/sparsediffpy/_core/_nodes_other.py b/sparsediffpy/_core/_nodes_other.py
index 63cceb3..6eccfed 100644
--- a/sparsediffpy/_core/_nodes_other.py
+++ b/sparsediffpy/_core/_nodes_other.py
@@ -14,9 +14,27 @@ def __init__(self, child, Q_csr_data, Q_csr_indices, Q_csr_indptr, Q_shape):
         self.shape = (1, 1)
 
 
+def _check_prod_child_is_variable(child):
+    """Require prod's argument to be a plain Variable.
+
+    Temporary limitation: the C engine does not implement the chain rule
+    for prod, so it only works correctly when the argument is a variable
+    (Jacobian is identity-like). Compositions like prod(f(x)) will give
+    wrong derivatives.
+    """
+    from sparsediffpy._core._scope import Variable
+    if not isinstance(child, Variable):
+        raise ValueError(
+            "prod requires its argument to be a plain Variable. "
+            "The C engine does not currently implement the chain rule for prod, "
+            "so compositions like prod(f(x)) are not supported."
+        )
+
+
 class Prod(Expression):
     """Product of all elements -> (1, 1)."""
     def __init__(self, child):
+        _check_prod_child_is_variable(child)
         self.child = child
         self.shape = (1, 1)
 
@@ -24,6 +42,7 @@ def __init__(self, child):
 class ProdAxisZero(Expression):
     """Product along axis 0 -> (1, d2)."""
     def __init__(self, child):
+        _check_prod_child_is_variable(child)
         self.child = child
         self.shape = (1, child.shape[1])
 
@@ -31,5 +50,6 @@ def __init__(self, child):
 class ProdAxisOne(Expression):
     """Product along axis 1 -> (1, d1). C layer returns row vector."""
     def __init__(self, child):
+        _check_prod_child_is_variable(child)
         self.child = child
         self.shape = (1, child.shape[0])
diff --git a/tests/test_validation.py b/tests/test_validation.py
index 9e5f695..9d3068b 100644
--- a/tests/test_validation.py
+++ b/tests/test_validation.py
@@ -130,6 +130,44 @@ def test_restricted_domain_on_index(self, scope):
         with pytest.raises(ValueError, match="cannot be applied directly"):
             sp.entr(x[1:3])
 
+    def test_quad_over_lin_z_must_be_variable(self, scope):
+        x = scope.Variable(3, 1)
+        z = scope.Variable(1, 1)
+        # This should work — z is a plain variable, not in numerator
+        sp.quad_over_lin(x, z)
+
+        # This should fail — z is an expression, not a plain variable
+        with pytest.raises(ValueError, match="must be a plain Variable"):
+            sp.quad_over_lin(x, sp.exp(z))
+
+    def test_quad_over_lin_z_not_in_numerator(self, scope):
+        x = scope.Variable(3, 1)
+        z = scope.Variable(1, 1)
+        # z appears in numerator via broadcast: x + z
+        with pytest.raises(ValueError, match="denominator variable z must not appear in the numerator"):
+            sp.quad_over_lin(x + z, z)
+
+        # z appears directly as scalar in numerator
+        with pytest.raises(ValueError, match="denominator variable z must not appear in the numerator"):
+            sp.quad_over_lin(z, z)
+
+    def test_prod_must_be_variable(self, scope):
+        x = scope.Variable(3, 1)
+        # This should work — x is a plain variable
+        sp.prod(x)
+
+        # This should fail — argument is a composition
+        with pytest.raises(ValueError, match="plain Variable"):
+            sp.prod(sp.sin(x))
+
+    def test_prod_axis_must_be_variable(self, scope):
+        X = scope.Variable(3, 2)
+        sp.prod(X, axis=0)
+        sp.prod(X, axis=1)
+
+        with pytest.raises(ValueError, match="plain Variable"):
+            sp.prod(sp.sin(X), axis=0)
+
 
 # ---------------------------------------------------------------------------
 # Wrong-size value assignment

From c24a93478a8ce598559e747c981dd489a3c21980 Mon Sep 17 00:00:00 2001
From: dance858 <danielcederberg1@gmail.com>
Date: Tue, 14 Apr 2026 13:06:44 -0700
Subject: [PATCH 07/17] remove unused functions

---
 sparsediffpy/_core/_registry.py | 7 -------
 sparsediffpy/_core/_shapes.py   | 7 -------
 2 files changed, 14 deletions(-)

diff --git a/sparsediffpy/_core/_registry.py b/sparsediffpy/_core/_registry.py
index 17a5e3d..1a18de5 100644
--- a/sparsediffpy/_core/_registry.py
+++ b/sparsediffpy/_core/_registry.py
@@ -113,13 +113,6 @@ def convert_quad_form(node, child_caps):
     )
 
 
-def convert_param_scalar_mult(node, child_caps):
-    return _C.make_param_scalar_mult(child_caps[0], child_caps[1])
-
-
-def convert_param_vector_mult(node, child_caps):
-    return _C.make_param_vector_mult(child_caps[0], child_caps[1])
-
 
 # ---------------------------------------------------------------------------
 # Registry dict
diff --git a/sparsediffpy/_core/_shapes.py b/sparsediffpy/_core/_shapes.py
index fae16b3..8554ddd 100644
--- a/sparsediffpy/_core/_shapes.py
+++ b/sparsediffpy/_core/_shapes.py
@@ -17,13 +17,6 @@ def is_scalar(shape):
     return shape == (1, 1)
 
 
-def is_column(shape):
-    return shape[1] == 1
-
-
-def is_row(shape):
-    return shape[0] == 1
-
 
 def broadcast_shape(left_shape, right_shape):
     """Compute broadcast result shape for elementwise operations.

From 7250295abbcba8ea0ce35a5641a096bf0a6e8827 Mon Sep 17 00:00:00 2001
From: dance858 <danielcederberg1@gmail.com>
Date: Tue, 14 Apr 2026 13:23:30 -0700
Subject: [PATCH 08/17] remove lazy imports

---
 sparsediffpy/_core/_dispatch.py     | 147 +++++++++++++++++++++++
 sparsediffpy/_core/_expression.py   | 173 +++-------------------------
 sparsediffpy/_core/_fn_bivariate.py |  16 ++-
 3 files changed, 175 insertions(+), 161 deletions(-)
 create mode 100644 sparsediffpy/_core/_dispatch.py

diff --git a/sparsediffpy/_core/_dispatch.py b/sparsediffpy/_core/_dispatch.py
new file mode 100644
index 0000000..70c34b2
--- /dev/null
+++ b/sparsediffpy/_core/_dispatch.py
@@ -0,0 +1,147 @@
+"""Operator dispatch: routes +, -, *, @, [] to the correct expression nodes.
+
+Separated from _expression.py to avoid circular imports — this module
+imports from both _expression and _nodes_*, but neither imports this module.
+_expression.py calls these functions via late binding (module-level reference).
+"""
+
+import numpy as np
+
+from sparsediffpy._core._constants import Constant, SparseConstant
+from sparsediffpy._core._expression import _wrap_constant
+from sparsediffpy._core._nodes_affine import (
+    Add, Broadcast, Index, LeftMatMul, Neg, ParamScalarMult,
+    ParamVectorMult, RightMatMul, Transpose,
+)
+from sparsediffpy._core._nodes_bivariate import MatMul, Multiply
+from sparsediffpy._core._nodes_elementwise import Power
+from sparsediffpy._core._shapes import broadcast_shape, check_matmul_shapes, is_scalar
+
+
+def _is_param_like(node):
+    # Lazy import to avoid circular: _scope -> _expression -> _dispatch -> _scope
+    from sparsediffpy._core._scope import Parameter
+    return isinstance(node, (Constant, SparseConstant, Parameter))
+
+
+def make_add(left, right):
+    result_shape, left_bc, right_bc = broadcast_shape(left.shape, right.shape)
+    if left_bc:
+        left = Broadcast(left, result_shape)
+    if right_bc:
+        right = Broadcast(right, result_shape)
+    return Add(left, right)
+
+
+def make_sub(left, right):
+    return make_add(left, Neg(right))
+
+
+def make_neg(node):
+    return Neg(node)
+
+
+def make_mul(left, right):
+    if _is_param_like(left) and is_scalar(left.shape):
+        return ParamScalarMult(left, right)
+    if _is_param_like(right) and is_scalar(right.shape):
+        return ParamScalarMult(right, left)
+
+    if _is_param_like(left) and left.shape == right.shape:
+        return ParamVectorMult(left, right)
+    if _is_param_like(right) and right.shape == left.shape:
+        return ParamVectorMult(right, left)
+
+    result_shape, left_bc, right_bc = broadcast_shape(left.shape, right.shape)
+    if left_bc:
+        left = Broadcast(left, result_shape)
+    if right_bc:
+        right = Broadcast(right, result_shape)
+
+    if _is_param_like(left):
+        return ParamVectorMult(left, right)
+    if _is_param_like(right):
+        return ParamVectorMult(right, left)
+
+    return Multiply(left, right)
+
+
+def make_matmul(left, right):
+    result_shape = check_matmul_shapes(left.shape, right.shape)
+
+    if _is_param_like(left) and not _is_param_like(right):
+        return LeftMatMul(left, right, result_shape)
+    if _is_param_like(right) and not _is_param_like(left):
+        return RightMatMul(right, left, result_shape)
+    return MatMul(left, right, result_shape)
+
+
+def make_pow(node, exponent):
+    if not isinstance(exponent, (int, float)):
+        raise TypeError("Exponent must be a constant number")
+    return Power(node, float(exponent))
+
+
+def make_transpose(node):
+    return Transpose(node)
+
+
+def make_index(node, key):
+    d1, d2 = node.shape
+
+    if isinstance(key, tuple):
+        if len(key) != 2:
+            raise IndexError("Only 1D or 2D indexing supported")
+        row_key, col_key = key
+        row_indices = _resolve_axis_index(row_key, d1)
+        col_indices = _resolve_axis_index(col_key, d2)
+        flat_indices = []
+        for c in col_indices:
+            for r in row_indices:
+                flat_indices.append(r + c * d1)
+        out_d1 = len(row_indices)
+        out_d2 = len(col_indices)
+    else:
+        if d2 == 1:
+            indices = _resolve_axis_index(key, d1)
+            flat_indices = indices
+            out_d1 = len(indices)
+            out_d2 = 1
+        elif d1 == 1:
+            indices = _resolve_axis_index(key, d2)
+            flat_indices = [i * d1 for i in indices]
+            out_d1 = 1
+            out_d2 = len(indices)
+        else:
+            total = d1 * d2
+            indices = _resolve_axis_index(key, total)
+            flat_indices = indices
+            out_d1 = len(indices)
+            out_d2 = 1
+
+    result_shape = (out_d1, out_d2)
+    flat_arr = np.array(flat_indices, dtype=np.int32)
+    return Index(node, flat_arr, result_shape)
+
+
+def _resolve_axis_index(key, length):
+    if isinstance(key, (int, np.integer)):
+        idx = int(key)
+        if idx < 0:
+            idx += length
+        if idx < 0 or idx >= length:
+            raise IndexError(f"Index {key} out of range for axis of length {length}")
+        return [idx]
+    if isinstance(key, slice):
+        return list(range(*key.indices(length)))
+    if isinstance(key, (list, np.ndarray)):
+        out = []
+        for i in key:
+            idx = int(i)
+            if idx < 0:
+                idx += length
+            if idx < 0 or idx >= length:
+                raise IndexError(f"Index {i} out of range for axis of length {length}")
+            out.append(idx)
+        return out
+    raise IndexError(f"Unsupported index type: {type(key).__name__}")
diff --git a/sparsediffpy/_core/_expression.py b/sparsediffpy/_core/_expression.py
index f3321ea..69ea79e 100644
--- a/sparsediffpy/_core/_expression.py
+++ b/sparsediffpy/_core/_expression.py
@@ -1,5 +1,6 @@
-"""Expression base class, operator overloading, dispatch helpers, and _wrap_constant.
+"""Expression base class and _wrap_constant.
 
+Operator dispatch lives in _dispatch.py (avoids circular imports).
 Node types are defined in _nodes_affine.py, _nodes_elementwise.py,
 _nodes_bivariate.py, and _nodes_other.py.
 """
@@ -8,11 +9,6 @@
 import scipy.sparse
 
 from sparsediffpy._core._constants import Constant, SparseConstant
-from sparsediffpy._core._shapes import (
-    broadcast_shape,
-    check_matmul_shapes,
-    is_scalar,
-)
 
 
 # ---------------------------------------------------------------------------
@@ -67,56 +63,41 @@ class Expression:
     __array_priority__ = 20
 
     def __add__(self, other):
-        other = _wrap_constant(other)
-        return _make_add(self, other)
+        return _dispatch.make_add(self, _wrap_constant(other))
 
     def __radd__(self, other):
-        other = _wrap_constant(other)
-        return _make_add(other, self)
+        return _dispatch.make_add(_wrap_constant(other), self)
 
     def __sub__(self, other):
-        other = _wrap_constant(other)
-        from sparsediffpy._core._nodes_affine import Neg
-        return _make_add(self, Neg(other))
+        return _dispatch.make_sub(self, _wrap_constant(other))
 
     def __rsub__(self, other):
-        other = _wrap_constant(other)
-        from sparsediffpy._core._nodes_affine import Neg
-        return _make_add(other, Neg(self))
+        return _dispatch.make_sub(_wrap_constant(other), self)
 
     def __neg__(self):
-        from sparsediffpy._core._nodes_affine import Neg
-        return Neg(self)
+        return _dispatch.make_neg(self)
 
     def __mul__(self, other):
-        other = _wrap_constant(other)
-        return _make_mul(self, other)
+        return _dispatch.make_mul(self, _wrap_constant(other))
 
     def __rmul__(self, other):
-        other = _wrap_constant(other)
-        return _make_mul(other, self)
+        return _dispatch.make_mul(_wrap_constant(other), self)
 
     def __matmul__(self, other):
-        other = _wrap_constant(other)
-        return _make_matmul(self, other)
+        return _dispatch.make_matmul(self, _wrap_constant(other))
 
     def __rmatmul__(self, other):
-        other = _wrap_constant(other)
-        return _make_matmul(other, self)
+        return _dispatch.make_matmul(_wrap_constant(other), self)
 
     def __pow__(self, exponent):
-        if not isinstance(exponent, (int, float)):
-            raise TypeError("Exponent must be a constant number")
-        from sparsediffpy._core._nodes_elementwise import Power
-        return Power(self, float(exponent))
+        return _dispatch.make_pow(self, exponent)
 
     def __getitem__(self, key):
-        return _make_index(self, key)
+        return _dispatch.make_index(self, key)
 
     @property
     def T(self):
-        from sparsediffpy._core._nodes_affine import Transpose
-        return Transpose(self)
+        return _dispatch.make_transpose(self)
 
     @property
     def size(self):
@@ -147,127 +128,9 @@ def size(self):
 
 
 # ---------------------------------------------------------------------------
-# Operator dispatch helpers
+# Import _dispatch at the bottom to avoid circular imports.
+# By this point Expression is fully defined, so _dispatch.py (which imports
+# from _nodes_*.py which inherit from Expression) can resolve everything.
 # ---------------------------------------------------------------------------
 
-def _is_param_like(node):
-    from sparsediffpy._core._scope import Parameter
-    return isinstance(node, (Constant, SparseConstant, Parameter))
-
-
-def _make_add(left, right):
-    from sparsediffpy._core._nodes_affine import Add, Broadcast
-    result_shape, left_bc, right_bc = broadcast_shape(left.shape, right.shape)
-    if left_bc:
-        left = Broadcast(left, result_shape)
-    if right_bc:
-        right = Broadcast(right, result_shape)
-    return Add(left, right)
-
-
-def _make_mul(left, right):
-    from sparsediffpy._core._nodes_affine import Broadcast, ParamScalarMult, ParamVectorMult
-    from sparsediffpy._core._nodes_bivariate import Multiply
-    from sparsediffpy._core._scope import Parameter
-
-    if _is_param_like(left) and is_scalar(left.shape):
-        return ParamScalarMult(left, right)
-    if _is_param_like(right) and is_scalar(right.shape):
-        return ParamScalarMult(right, left)
-
-    if _is_param_like(left) and left.shape == right.shape:
-        return ParamVectorMult(left, right)
-    if _is_param_like(right) and right.shape == left.shape:
-        return ParamVectorMult(right, left)
-
-    result_shape, left_bc, right_bc = broadcast_shape(left.shape, right.shape)
-    if left_bc:
-        left = Broadcast(left, result_shape)
-    if right_bc:
-        right = Broadcast(right, result_shape)
-
-    if _is_param_like(left):
-        return ParamVectorMult(left, right)
-    if _is_param_like(right):
-        return ParamVectorMult(right, left)
-
-    return Multiply(left, right)
-
-
-def _make_matmul(left, right):
-    from sparsediffpy._core._nodes_affine import LeftMatMul, RightMatMul
-    from sparsediffpy._core._nodes_bivariate import MatMul
-    from sparsediffpy._core._scope import Parameter
-
-    result_shape = check_matmul_shapes(left.shape, right.shape)
-    left_is_param = isinstance(left, (Constant, SparseConstant, Parameter))
-    right_is_param = isinstance(right, (Constant, SparseConstant, Parameter))
-
-    if left_is_param and not right_is_param:
-        return LeftMatMul(left, right, result_shape)
-    if right_is_param and not left_is_param:
-        return RightMatMul(right, left, result_shape)
-    return MatMul(left, right, result_shape)
-
-
-def _make_index(node, key):
-    from sparsediffpy._core._nodes_affine import Index
-
-    d1, d2 = node.shape
-
-    if isinstance(key, tuple):
-        if len(key) != 2:
-            raise IndexError("Only 1D or 2D indexing supported")
-        row_key, col_key = key
-        row_indices = _resolve_axis_index(row_key, d1)
-        col_indices = _resolve_axis_index(col_key, d2)
-        flat_indices = []
-        for c in col_indices:
-            for r in row_indices:
-                flat_indices.append(r + c * d1)
-        out_d1 = len(row_indices)
-        out_d2 = len(col_indices)
-    else:
-        if d2 == 1:
-            indices = _resolve_axis_index(key, d1)
-            flat_indices = indices
-            out_d1 = len(indices)
-            out_d2 = 1
-        elif d1 == 1:
-            indices = _resolve_axis_index(key, d2)
-            flat_indices = [i * d1 for i in indices]
-            out_d1 = 1
-            out_d2 = len(indices)
-        else:
-            total = d1 * d2
-            indices = _resolve_axis_index(key, total)
-            flat_indices = indices
-            out_d1 = len(indices)
-            out_d2 = 1
-
-    result_shape = (out_d1, out_d2)
-    flat_arr = np.array(flat_indices, dtype=np.int32)
-    return Index(node, flat_arr, result_shape)
-
-
-def _resolve_axis_index(key, length):
-    if isinstance(key, (int, np.integer)):
-        idx = int(key)
-        if idx < 0:
-            idx += length
-        if idx < 0 or idx >= length:
-            raise IndexError(f"Index {key} out of range for axis of length {length}")
-        return [idx]
-    if isinstance(key, slice):
-        return list(range(*key.indices(length)))
-    if isinstance(key, (list, np.ndarray)):
-        out = []
-        for i in key:
-            idx = int(i)
-            if idx < 0:
-                idx += length
-            if idx < 0 or idx >= length:
-                raise IndexError(f"Index {i} out of range for axis of length {length}")
-            out.append(idx)
-        return out
-    raise IndexError(f"Unsupported index type: {type(key).__name__}")
+from sparsediffpy._core import _dispatch  # noqa: E402
diff --git a/sparsediffpy/_core/_fn_bivariate.py b/sparsediffpy/_core/_fn_bivariate.py
index 4ae3487..ef9e2f8 100644
--- a/sparsediffpy/_core/_fn_bivariate.py
+++ b/sparsediffpy/_core/_fn_bivariate.py
@@ -39,14 +39,18 @@ def quad_form(x, Q):
 
 
 def quad_over_lin(x, z):
-    """sum(x^2) / z where z is a scalar expression."""
-    x = _wrap_constant(x)
-    z = _wrap_constant(z)
+    """sum(x^2) / z where z is a scalar variable.
+
+    Both arguments must be variable-dependent expressions.
+    z must be a plain Variable (not a composition).
+    """
     return QuadOverLin(x, z)
 
 
 def rel_entr(x, y):
-    """x * log(x / y) elementwise."""
-    x = _wrap_constant(x)
-    y = _wrap_constant(y)
+    """x * log(x / y) elementwise.
+
+    Both arguments must be variable-dependent expressions.
+    The C engine does not support constant arguments.
+    """
     return RelEntr(x, y)

From d292a9f32ec6ca7137d7693c21bffe4d4ee4e693 Mon Sep 17 00:00:00 2001
From: dance858 <danielcederberg1@gmail.com>
Date: Tue, 14 Apr 2026 17:44:56 -0700
Subject: [PATCH 09/17] clean up scope a bit

---
 sparsediffpy/__init__.py         |  2 +-
 sparsediffpy/_core/_compile.py   | 13 +++++-
 sparsediffpy/_core/_registry.py  |  8 +++-
 sparsediffpy/_core/_scope.py     | 74 +++++++++++++-------------------
 tests/affine/test_left_matmul.py |  6 ++-
 tests/affine/test_scalar_mult.py |  6 ++-
 tests/affine/test_vector_mult.py |  6 ++-
 tests/test_misc.py               |  6 ++-
 tests/test_validation.py         | 26 +++++++----
 9 files changed, 83 insertions(+), 64 deletions(-)

diff --git a/sparsediffpy/__init__.py b/sparsediffpy/__init__.py
index 1d656a3..29480ac 100644
--- a/sparsediffpy/__init__.py
+++ b/sparsediffpy/__init__.py
@@ -18,7 +18,7 @@
 from sparsediffpy import _sparsediffengine  # noqa: F401
 
 # Core classes
-from sparsediffpy._core._scope import Scope, Variable, Parameter  # noqa: F401
+from sparsediffpy._core._scope import Scope, Variable, Parameter, DimensionError  # noqa: F401
 from sparsediffpy._core._expression import Expression  # noqa: F401
 
 # Compile
diff --git a/sparsediffpy/_core/_compile.py b/sparsediffpy/_core/_compile.py
index 4fb5ded..09f1974 100644
--- a/sparsediffpy/_core/_compile.py
+++ b/sparsediffpy/_core/_compile.py
@@ -163,9 +163,12 @@ def _convert_node(node, n_vars, cache, param_caps, param_objs):
         )
 
     if isinstance(node, Parameter):
+        # Use current values if set, otherwise zeros as placeholder.
+        # Real values are synced via problem_update_params before evaluation.
+        size = node.shape[0] * node.shape[1]
+        values = node._value_flat if node._value_flat is not None else np.zeros(size)
         cap = _C.make_parameter(
-            node.shape[0], node.shape[1], node._param_id, n_vars,
-            node._value_flat,
+            node.shape[0], node.shape[1], node._param_id, n_vars, values,
         )
         param_caps.append(cap)
         param_objs.append(node)
@@ -308,6 +311,12 @@ def _sync_params(self):
         """Push current parameter values to the C problem."""
         if not self._param_objects:
             return
+        for p in self._param_objects:
+            if p._value_flat is None:
+                raise ValueError(
+                    f"Parameter with shape {p.shape} has no value set. "
+                    f"Assign a value via parameter.value = ... before evaluating."
+                )
         theta_parts = [p._value_flat for p in self._param_objects]
         theta = np.concatenate(theta_parts)
         _C.problem_update_params(self._problem, theta)
diff --git a/sparsediffpy/_core/_registry.py b/sparsediffpy/_core/_registry.py
index 1a18de5..b81c141 100644
--- a/sparsediffpy/_core/_registry.py
+++ b/sparsediffpy/_core/_registry.py
@@ -62,8 +62,14 @@ def make_dense_right_matmul(param_node, child_cap, A_flat, m, n):
 
 
 def _to_dense_row_major(matrix):
-    """Convert a Constant or Parameter to row-major flat data for dense matmul."""
+    """Convert a Constant or Parameter to row-major flat data for dense matmul.
+
+    For Parameters with no value set yet, returns zeros as a placeholder —
+    the real values are pushed via problem_update_params before evaluation.
+    """
     m, n = matrix.shape
+    if matrix._value_flat is None:
+        return np.zeros(m * n, dtype=np.float64)
     return matrix._value_flat.reshape((m, n), order="F").flatten(order="C")
 
 
diff --git a/sparsediffpy/_core/_scope.py b/sparsediffpy/_core/_scope.py
index 86339e6..c64be8b 100644
--- a/sparsediffpy/_core/_scope.py
+++ b/sparsediffpy/_core/_scope.py
@@ -6,6 +6,11 @@
 from sparsediffpy._core._shapes import validate_shape
 
 
+class DimensionError(ValueError):
+    """Raised when a value has the wrong number of elements."""
+    pass
+
+
 class Variable(Expression):
     """A decision variable in the expression tree.
 
@@ -20,61 +25,41 @@ def __init__(self, scope, var_id, shape):
 
     @property
     def value(self):
-        size = self.shape[0] * self.shape[1]
-        return self._scope._flat_values[self._var_id:self._var_id + size].copy()
+        return self._scope._flat_values[self._var_id:self._var_id + self.size].copy()
 
     @value.setter
     def value(self, val):
         val = np.asarray(val, dtype=np.float64).ravel()
-        size = self.shape[0] * self.shape[1]
-        if val.size != size:
-            raise ValueError(
-                f"Expected {size} elements for Variable with shape {self.shape}, "
-                f"got {val.size}"
-            )
-        self._scope._flat_values[self._var_id:self._var_id + size] = val
+        if val.size != self.size:
+            raise DimensionError(f"expected {self.size} elements, got {val.size}")
+        self._scope._flat_values[self._var_id:self._var_id + self.size] = val
 
 
 class Parameter(Expression):
     """An updatable parameter in the expression tree.
 
-    Created by Scope.Parameter(). Values are stored on the parameter itself
-    (not in the scope's flat buffer). Updated via .value property.
+    Created by Scope.Parameter(). Values must be set via .value before
+    evaluating any expression that uses this parameter.
     """
 
-    def __init__(self, scope, param_id, shape, value=None):
+    def __init__(self, scope, param_id, shape):
         self._scope = scope
         self._param_id = param_id
         self.shape = shape
-        size = shape[0] * shape[1]
-        if value is not None:
-            self._value_flat = np.asarray(value, dtype=np.float64).ravel(order="F")
-            if self._value_flat.size != size:
-                raise ValueError(
-                    f"Parameter value has {self._value_flat.size} elements, "
-                    f"expected {size} for shape {shape}"
-                )
-        else:
-            self._value_flat = np.zeros(size, dtype=np.float64)
+        self._value_flat = None
 
     @property
     def value(self):
+        if self._value_flat is None:
+            return None
         return self._value_flat.copy()
 
     @value.setter
     def value(self, val):
         val = np.asarray(val, dtype=np.float64).ravel(order="F")
-        size = self.shape[0] * self.shape[1]
-        if val.size != size:
-            raise ValueError(
-                f"Expected {size} elements for Parameter with shape {self.shape}, "
-                f"got {val.size}"
-            )
-        self._value_flat[:] = val
-
-
-# Patch _is_param_like to recognize Parameter
-# (already handled via lazy import in _expressions.py)
+        if val.size != self.size:
+            raise DimensionError(f"expected {self.size} elements, got {val.size}")
+        self._value_flat = val.copy()
 
 
 class Scope:
@@ -103,26 +88,27 @@ def Variable(self, d1, d2):
         self._variables.append(var)
         return var
 
-    def Parameter(self, d1, d2, value=None):
-        """Create a new updatable parameter in this scope."""
+    def Parameter(self, d1, d2):
+        """Create a new updatable parameter in this scope.
+
+        Set its value via .value = ... before evaluating.
+        """
         validate_shape(d1, d2)
         size = d1 * d2
         param_id = self._next_param_offset
         self._next_param_offset += size
 
-        param = Parameter(self, param_id, (d1, d2), value)
+        param = Parameter(self, param_id, (d1, d2))
         self._parameters.append(param)
         return param
 
-    def set_values(self, flat_array):
+    def set_values(self, array):
         """Set all variable values at once from a flat array."""
-        flat_array = np.asarray(flat_array, dtype=np.float64)
-        if flat_array.size != self._flat_values.size:
-            raise ValueError(
-                f"Expected flat array of size {self._flat_values.size}, "
-                f"got {flat_array.size}"
-            )
-        self._flat_values[:] = flat_array
+        array = np.asarray(array, dtype=np.float64)
+        in_size = self._flat_values.size
+        if array.size != in_size:
+            raise DimensionError(f"expected {in_size} elements, got {array.size}")
+        self._flat_values[:] = array
 
     def get_values(self):
         """Return a copy of the flat value buffer."""
diff --git a/tests/affine/test_left_matmul.py b/tests/affine/test_left_matmul.py
index 3c72164..4c2b9ee 100644
--- a/tests/affine/test_left_matmul.py
+++ b/tests/affine/test_left_matmul.py
@@ -42,7 +42,8 @@ def test_left_matmul_sparse_forward(scope, rng):
 
 def test_left_matmul_parameter_jacobian(scope, rng):
     x = scope.Variable(3, 1)
-    A = scope.Parameter(4, 3, value=rng.standard_normal((4, 3)))
+    A = scope.Parameter(4, 3)
+    A.value = rng.standard_normal((4, 3))
     f = A @ x
     fn = sp.compile(f)
     checker = NumericalDerivativeChecker(fn, scope)
@@ -51,7 +52,8 @@ def test_left_matmul_parameter_jacobian(scope, rng):
 
 def test_left_matmul_parameter_update(scope, rng):
     x = scope.Variable(3, 1)
-    A = scope.Parameter(3, 3, value=np.eye(3))
+    A = scope.Parameter(3, 3)
+    A.value = np.eye(3)
     f = A @ x
     fn = sp.compile(f)
     x0 = random_point(scope, rng)
diff --git a/tests/affine/test_scalar_mult.py b/tests/affine/test_scalar_mult.py
index ed19a92..a17ac97 100644
--- a/tests/affine/test_scalar_mult.py
+++ b/tests/affine/test_scalar_mult.py
@@ -21,7 +21,8 @@ def test_scalar_mult_constant_forward(scope, rng):
 
 def test_scalar_mult_parameter_jacobian(scope, rng):
     x = scope.Variable(4, 1)
-    a = scope.Parameter(1, 1, value=np.array([[3.0]]))
+    a = scope.Parameter(1, 1)
+    a.value = np.array([[3.0]])
     f = a * x
     fn = sp.compile(f)
     checker = NumericalDerivativeChecker(fn, scope)
@@ -30,7 +31,8 @@ def test_scalar_mult_parameter_jacobian(scope, rng):
 
 def test_scalar_mult_parameter_update(scope, rng):
     x = scope.Variable(3, 1)
-    a = scope.Parameter(1, 1, value=np.array([[2.0]]))
+    a = scope.Parameter(1, 1)
+    a.value = np.array([[2.0]])
     f = a * x
     fn = sp.compile(f)
     x0 = random_point(scope, rng)
diff --git a/tests/affine/test_vector_mult.py b/tests/affine/test_vector_mult.py
index abdca11..8a57278 100644
--- a/tests/affine/test_vector_mult.py
+++ b/tests/affine/test_vector_mult.py
@@ -23,7 +23,8 @@ def test_vector_mult_constant_forward(scope, rng):
 
 def test_vector_mult_parameter_jacobian(scope, rng):
     x = scope.Variable(3, 1)
-    a = scope.Parameter(3, 1, value=np.array([1.0, 2.0, 3.0]))
+    a = scope.Parameter(3, 1)
+    a.value = np.array([1.0, 2.0, 3.0])
     f = a * x
     fn = sp.compile(f)
     checker = NumericalDerivativeChecker(fn, scope)
@@ -32,7 +33,8 @@ def test_vector_mult_parameter_jacobian(scope, rng):
 
 def test_vector_mult_parameter_update(scope, rng):
     x = scope.Variable(3, 1)
-    a = scope.Parameter(3, 1, value=np.array([1.0, 1.0, 1.0]))
+    a = scope.Parameter(3, 1)
+    a.value = np.array([1.0, 1.0, 1.0])
     f = a * x
     fn = sp.compile(f)
     x0 = random_point(scope, rng)
diff --git a/tests/test_misc.py b/tests/test_misc.py
index 89814bb..0e87139 100644
--- a/tests/test_misc.py
+++ b/tests/test_misc.py
@@ -149,7 +149,8 @@ def test_negative_fancy(self, scope, rng):
 class TestParameterJacobianAfterUpdate:
     def test_left_matmul_jacobian_after_update(self, scope, rng):
         x = scope.Variable(3, 1)
-        A = scope.Parameter(3, 3, value=np.eye(3))
+        A = scope.Parameter(3, 3)
+        A.value = np.eye(3)
         f = A @ x
         fn = sp.compile(f)
 
@@ -163,7 +164,8 @@ def test_left_matmul_jacobian_after_update(self, scope, rng):
 
     def test_scalar_mult_jacobian_after_update(self, scope, rng):
         x = scope.Variable(3, 1)
-        a = scope.Parameter(1, 1, value=np.array([[3.0]]))
+        a = scope.Parameter(1, 1)
+        a.value = np.array([[3.0]])
         f = a * x
         fn = sp.compile(f)
 
diff --git a/tests/test_validation.py b/tests/test_validation.py
index 9d3068b..c46f8d9 100644
--- a/tests/test_validation.py
+++ b/tests/test_validation.py
@@ -176,27 +176,37 @@ def test_prod_axis_must_be_variable(self, scope):
 class TestValueAssignment:
     def test_variable_wrong_size(self, scope):
         x = scope.Variable(3, 1)
-        with pytest.raises(ValueError, match="Expected 3"):
+        with pytest.raises(ValueError, match="expected 3 elements"):
             x.value = np.array([1.0, 2.0])
 
     def test_variable_too_many(self, scope):
         x = scope.Variable(3, 1)
-        with pytest.raises(ValueError, match="Expected 3"):
+        with pytest.raises(ValueError, match="expected 3 elements"):
             x.value = np.array([1.0, 2.0, 3.0, 4.0])
 
     def test_parameter_wrong_size(self, scope):
-        p = scope.Parameter(2, 2, value=np.eye(2))
-        with pytest.raises(ValueError, match="Expected 4"):
+        p = scope.Parameter(2, 2)
+        p.value = np.eye(2)
+        with pytest.raises(ValueError, match="expected 4 elements"):
             p.value = np.array([1.0, 2.0])
 
     def test_scope_set_values_wrong_size(self, scope):
         x = scope.Variable(3, 1)
-        with pytest.raises(ValueError, match="Expected flat array of size 3"):
+        with pytest.raises(ValueError, match="expected 3 elements"):
             scope.set_values(np.array([1.0, 2.0]))
 
-    def test_parameter_init_wrong_size(self, scope):
-        with pytest.raises(ValueError, match="elements"):
-            scope.Parameter(2, 2, value=np.array([1.0, 2.0]))
+    def test_parameter_unset_value_is_none(self, scope):
+        p = scope.Parameter(2, 2)
+        assert p.value is None
+
+    def test_parameter_unset_raises_on_eval(self, scope):
+        x = scope.Variable(3, 1)
+        A = scope.Parameter(3, 3)
+        f = A @ x
+        fn = sp.compile(f)
+        x.value = np.array([1.0, 2.0, 3.0])
+        with pytest.raises(ValueError, match="has no value set"):
+            fn.forward()
 
 
 # ---------------------------------------------------------------------------

From ec8926087c2d4f149010cd82033c97c0bf7fcc55 Mon Sep 17 00:00:00 2001
From: dance858 <danielcederberg1@gmail.com>
Date: Tue, 14 Apr 2026 18:59:01 -0700
Subject: [PATCH 10/17] simplify things

---
 sparsediffpy/_core/_fn_affine.py       | 24 +++++++++---------
 sparsediffpy/_core/_fn_bivariate.py    | 33 ++++++------------------
 sparsediffpy/_core/_nodes_bivariate.py | 19 +++++++-------
 sparsediffpy/_core/_nodes_other.py     |  9 +++----
 sparsediffpy/_core/_registry.py        | 15 ++++++-----
 tests/test_validation.py               | 35 +++++++++++++-------------
 6 files changed, 56 insertions(+), 79 deletions(-)

diff --git a/sparsediffpy/_core/_fn_affine.py b/sparsediffpy/_core/_fn_affine.py
index 2ee0ab6..901dbfe 100644
--- a/sparsediffpy/_core/_fn_affine.py
+++ b/sparsediffpy/_core/_fn_affine.py
@@ -30,8 +30,14 @@ def sum(x, axis=None):
     axis=0: sum along rows (collapse d1) -> (1, d2)
     axis=1: sum along columns (collapse d2) -> (1, d1)
     """
-    c_axis = -1 if axis is None else axis
-    return Sum(x, c_axis)
+    if axis is None:
+        return Sum(x, -1)
+    elif axis == 0:
+        return Sum(x, 0)
+    elif axis == 1:
+        return Sum(x, 1)
+    else:
+        raise ValueError(f"Invalid axis {axis}, must be None, 0, or 1")
 
 
 def prod(x, axis=None):
@@ -58,15 +64,12 @@ def hstack(expressions):
     """
     exprs = [_wrap_constant(e) for e in expressions]
     if not exprs:
-        raise ValueError("hstack requires at least one expression")
+        raise ValueError("hstack: empty argument")
 
     d1 = exprs[0].shape[0]
     for e in exprs[1:]:
         if e.shape[0] != d1:
-            raise ValueError(
-                f"hstack: all expressions must have the same number of rows, "
-                f"got {d1} and {e.shape[0]}"
-            )
+            raise ValueError(f"hstack: row mismatch, {d1} vs {e.shape[0]}")
 
     total_d2 = _builtin_sum(e.shape[1] for e in exprs)
     return HStack(exprs, (d1, total_d2))
@@ -79,15 +82,12 @@ def vstack(expressions):
     """
     exprs = [_wrap_constant(e) for e in expressions]
     if not exprs:
-        raise ValueError("vstack requires at least one expression")
+        raise ValueError("vstack: empty argument")
 
     d2 = exprs[0].shape[1]
     for e in exprs[1:]:
         if e.shape[1] != d2:
-            raise ValueError(
-                f"vstack: all expressions must have the same number of columns, "
-                f"got {d2} and {e.shape[1]}"
-            )
+            raise ValueError(f"vstack: column mismatch, {d2} vs {e.shape[1]}")
 
     transposed = [Transpose(e) for e in exprs]
     total_d1 = _builtin_sum(e.shape[0] for e in exprs)
diff --git a/sparsediffpy/_core/_fn_bivariate.py b/sparsediffpy/_core/_fn_bivariate.py
index ef9e2f8..480d839 100644
--- a/sparsediffpy/_core/_fn_bivariate.py
+++ b/sparsediffpy/_core/_fn_bivariate.py
@@ -1,41 +1,22 @@
 """Bivariate named functions: sp.quad_form, sp.quad_over_lin, sp.rel_entr."""
 
-import numpy as np
 import scipy.sparse
 
-from sparsediffpy._core._expression import _wrap_constant
 from sparsediffpy._core._nodes_bivariate import QuadOverLin, RelEntr
 from sparsediffpy._core._nodes_other import QuadForm
 
 
 def quad_form(x, Q):
-    """Quadratic form x' Q x.
-
-    x must be a column vector (n, 1).
-    Q must be a scipy.sparse matrix or np.ndarray of shape (n, n).
-    """
-    x = _wrap_constant(x)
-    if x.shape[1] != 1:
-        raise ValueError(f"quad_form: x must be a column vector, got shape {x.shape}")
-
-    if not scipy.sparse.issparse(Q):
+    """Quadratic form xT Q x with x (n, 1) and Q (n, n)"""
+    if not isinstance(Q, scipy.sparse.csr_matrix):
         Q = scipy.sparse.csr_matrix(Q)
-    else:
-        Q = Q.tocsr()
 
     n = x.shape[0]
-    if Q.shape != (n, n):
-        raise ValueError(
-            f"quad_form: Q shape {Q.shape} doesn't match x shape {x.shape}"
-        )
-
-    return QuadForm(
-        x,
-        Q_csr_data=np.asarray(Q.data, dtype=np.float64),
-        Q_csr_indices=np.asarray(Q.indices, dtype=np.int32),
-        Q_csr_indptr=np.asarray(Q.indptr, dtype=np.int32),
-        Q_shape=Q.shape,
-    )
+    if x.shape[1] != 1 or Q.shape != (n, n):
+        raise ValueError(f"quad_form: need x (n, 1) and Q (n, n) "
+                           ", got x {x.shape} and Q {Q.shape}")
+
+    return QuadForm(x, Q)
 
 
 def quad_over_lin(x, z):
diff --git a/sparsediffpy/_core/_nodes_bivariate.py b/sparsediffpy/_core/_nodes_bivariate.py
index 516cce1..7b38837 100644
--- a/sparsediffpy/_core/_nodes_bivariate.py
+++ b/sparsediffpy/_core/_nodes_bivariate.py
@@ -36,24 +36,25 @@ def __init__(self, left, right, result_shape):
 
 
 class QuadOverLin(Expression):
-    """sum(x^2) / z where z is a scalar variable.
+    """sum(x^2) / z where both x and z are plain Variables.
 
-    z must be a plain Variable and must not appear in x.
+    z must be scalar and must not appear in x.
     """
     def __init__(self, x, z):
-        if not is_scalar(z.shape):
-            raise ValueError(f"quad_over_lin: z must be scalar, got shape {z.shape}")
         from sparsediffpy._core._scope import Variable
+        if not isinstance(x, Variable):
+            raise ValueError(
+                "quad_over_lin: x (first argument) must be a plain Variable."
+            )
         if not isinstance(z, Variable):
             raise ValueError(
-                "quad_over_lin: z (second argument) must be a plain Variable. "
-                "The C engine requires this — compositions like quad_over_lin(x, f(y)) "
-                "are not supported."
+                "quad_over_lin: z (second argument) must be a plain scalar Variable."
             )
+        if not is_scalar(z.shape):
+            raise ValueError(f"quad_over_lin: z must be scalar, got {z.shape}")
         if _expr_contains_variable(x, z):
             raise ValueError(
-                "quad_over_lin: the denominator variable z must not appear in "
-                "the numerator x. Use separate variables for numerator and denominator."
+                "quad_over_lin: z must not appear in x."
             )
         self.x = x
         self.z = z
diff --git a/sparsediffpy/_core/_nodes_other.py b/sparsediffpy/_core/_nodes_other.py
index 6eccfed..2365b03 100644
--- a/sparsediffpy/_core/_nodes_other.py
+++ b/sparsediffpy/_core/_nodes_other.py
@@ -4,13 +4,10 @@
 
 
 class QuadForm(Expression):
-    """x' Q x where Q is a constant sparse matrix."""
-    def __init__(self, child, Q_csr_data, Q_csr_indices, Q_csr_indptr, Q_shape):
+    """x' Q x where Q is a constant CSR sparse matrix."""
+    def __init__(self, child, Q):
         self.child = child
-        self.Q_csr_data = Q_csr_data
-        self.Q_csr_indices = Q_csr_indices
-        self.Q_csr_indptr = Q_csr_indptr
-        self.Q_shape = Q_shape
+        self.Q = Q
         self.shape = (1, 1)
 
 
diff --git a/sparsediffpy/_core/_registry.py b/sparsediffpy/_core/_registry.py
index b81c141..59a7c70 100644
--- a/sparsediffpy/_core/_registry.py
+++ b/sparsediffpy/_core/_registry.py
@@ -10,10 +10,8 @@
 import numpy as np
 
 from sparsediffpy import _sparsediffengine as _C
-from sparsediffpy._core._constants import Constant, SparseConstant
 from sparsediffpy._core._nodes_affine import (
-    Add, Broadcast, DiagVec, HStack, Index, LeftMatMul, Neg,
-    ParamScalarMult, ParamVectorMult, Reshape, RightMatMul, Sum, Trace,
+    Add, Broadcast, DiagVec, HStack, Index, Neg, Reshape, Sum, Trace,
     Transpose,
 )
 from sparsediffpy._core._nodes_bivariate import (
@@ -26,7 +24,6 @@
 from sparsediffpy._core._nodes_other import (
     Prod, ProdAxisOne, ProdAxisZero, QuadForm,
 )
-from sparsediffpy._core._scope import Parameter
 
 
 # ---------------------------------------------------------------------------
@@ -38,8 +35,7 @@ def make_sparse_left_matmul(param_node, child_cap, matrix):
     return _C.make_left_matmul(
         param_node, child_cap, "sparse",
         matrix._csr_data, matrix._csr_indices, matrix._csr_indptr,
-        matrix.shape[0], matrix.shape[1],
-    )
+        matrix.shape[0], matrix.shape[1])
 
 
 def make_dense_left_matmul(param_node, child_cap, A_flat, m, n):
@@ -112,10 +108,13 @@ def convert_rel_entr(node, child_caps):
 
 
 def convert_quad_form(node, child_caps):
+    Q = node.Q
     return _C.make_quad_form(
         child_caps[0],
-        node.Q_csr_data, node.Q_csr_indices, node.Q_csr_indptr,
-        node.Q_shape[0], node.Q_shape[1],
+        np.asarray(Q.data, dtype=np.float64),
+        np.asarray(Q.indices, dtype=np.int32),
+        np.asarray(Q.indptr, dtype=np.int32),
+        Q.shape[0], Q.shape[1],
     )
 
 
diff --git a/tests/test_validation.py b/tests/test_validation.py
index c46f8d9..77c680b 100644
--- a/tests/test_validation.py
+++ b/tests/test_validation.py
@@ -82,13 +82,13 @@ def test_reshape_size_mismatch(self, scope):
     def test_quad_form_wrong_Q_size(self, scope):
         x = scope.Variable(3, 1)
         Q = np.eye(4)
-        with pytest.raises(ValueError, match="doesn't match"):
+        with pytest.raises(ValueError, match="need x"):
             sp.quad_form(x, Q)
 
     def test_quad_form_non_column(self, scope):
         x = scope.Variable(1, 3)
         Q = np.eye(3)
-        with pytest.raises(ValueError, match="column vector"):
+        with pytest.raises(ValueError, match="need x"):
             sp.quad_form(x, Q)
 
     def test_pow_non_numeric_exponent(self, scope):
@@ -97,23 +97,23 @@ def test_pow_non_numeric_exponent(self, scope):
             x ** "two"
 
     def test_hstack_empty(self):
-        with pytest.raises(ValueError, match="at least one"):
+        with pytest.raises(ValueError, match="empty argument"):
             sp.hstack([])
 
     def test_vstack_empty(self):
-        with pytest.raises(ValueError, match="at least one"):
+        with pytest.raises(ValueError, match="empty argument"):
             sp.vstack([])
 
     def test_hstack_mismatched_rows(self, scope):
         x = scope.Variable(3, 1)
         y = scope.Variable(2, 1)
-        with pytest.raises(ValueError, match="same number of rows"):
+        with pytest.raises(ValueError, match="row mismatch"):
             sp.hstack([x, y])
 
     def test_vstack_mismatched_cols(self, scope):
         X = scope.Variable(3, 2)
         Y = scope.Variable(3, 3)
-        with pytest.raises(ValueError, match="same number of columns"):
+        with pytest.raises(ValueError, match="column mismatch"):
             sp.vstack([X, Y])
 
     def test_restricted_domain_on_index(self, scope):
@@ -130,25 +130,24 @@ def test_restricted_domain_on_index(self, scope):
         with pytest.raises(ValueError, match="cannot be applied directly"):
             sp.entr(x[1:3])
 
-    def test_quad_over_lin_z_must_be_variable(self, scope):
+    def test_quad_over_lin_args_must_be_variables(self, scope):
         x = scope.Variable(3, 1)
         z = scope.Variable(1, 1)
-        # This should work — z is a plain variable, not in numerator
+        # This should work — both are plain variables
         sp.quad_over_lin(x, z)
 
-        # This should fail — z is an expression, not a plain variable
-        with pytest.raises(ValueError, match="must be a plain Variable"):
+        # x is a composition — fails
+        with pytest.raises(ValueError, match="x.*must be a plain Variable"):
+            sp.quad_over_lin(sp.sin(x), z)
+
+        # z is a composition — fails
+        with pytest.raises(ValueError, match="z.*must be a plain"):
             sp.quad_over_lin(x, sp.exp(z))
 
-    def test_quad_over_lin_z_not_in_numerator(self, scope):
-        x = scope.Variable(3, 1)
+    def test_quad_over_lin_z_not_in_x(self, scope):
         z = scope.Variable(1, 1)
-        # z appears in numerator via broadcast: x + z
-        with pytest.raises(ValueError, match="denominator variable z must not appear in the numerator"):
-            sp.quad_over_lin(x + z, z)
-
-        # z appears directly as scalar in numerator
-        with pytest.raises(ValueError, match="denominator variable z must not appear in the numerator"):
+        # z used as both args
+        with pytest.raises(ValueError, match="z must not appear in x"):
             sp.quad_over_lin(z, z)
 
     def test_prod_must_be_variable(self, scope):

From 15bea5b7bbf0d4a1f9e68e1c283c3155e8875ad0 Mon Sep 17 00:00:00 2001
From: dance858 <danielcederberg1@gmail.com>
Date: Tue, 14 Apr 2026 19:08:02 -0700
Subject: [PATCH 11/17] more simplifications

---
 sparsediffpy/_core/_dispatch.py | 23 +++++++++--------------
 1 file changed, 9 insertions(+), 14 deletions(-)

diff --git a/sparsediffpy/_core/_dispatch.py b/sparsediffpy/_core/_dispatch.py
index 70c34b2..8267803 100644
--- a/sparsediffpy/_core/_dispatch.py
+++ b/sparsediffpy/_core/_dispatch.py
@@ -69,9 +69,9 @@ def make_mul(left, right):
 def make_matmul(left, right):
     result_shape = check_matmul_shapes(left.shape, right.shape)
 
-    if _is_param_like(left) and not _is_param_like(right):
+    if _is_param_like(left):
         return LeftMatMul(left, right, result_shape)
-    if _is_param_like(right) and not _is_param_like(left):
+    if _is_param_like(right):
         return RightMatMul(right, left, result_shape)
     return MatMul(left, right, result_shape)
 
@@ -103,25 +103,20 @@ def make_index(node, key):
         out_d2 = len(col_indices)
     else:
         if d2 == 1:
-            indices = _resolve_axis_index(key, d1)
-            flat_indices = indices
-            out_d1 = len(indices)
+            flat_indices = _resolve_axis_index(key, d1)
+            out_d1 = len(flat_indices)
             out_d2 = 1
         elif d1 == 1:
-            indices = _resolve_axis_index(key, d2)
-            flat_indices = [i * d1 for i in indices]
+            flat_indices = _resolve_axis_index(key, d2)
             out_d1 = 1
-            out_d2 = len(indices)
+            out_d2 = len(flat_indices)
         else:
-            total = d1 * d2
-            indices = _resolve_axis_index(key, total)
-            flat_indices = indices
-            out_d1 = len(indices)
+            flat_indices = _resolve_axis_index(key, d1 * d2)
+            out_d1 = len(flat_indices)
             out_d2 = 1
 
-    result_shape = (out_d1, out_d2)
     flat_arr = np.array(flat_indices, dtype=np.int32)
-    return Index(node, flat_arr, result_shape)
+    return Index(node, flat_arr, (out_d1, out_d2))
 
 
 def _resolve_axis_index(key, length):

From cddc744018e003a6581178af89e02ae89dc96aa3 Mon Sep 17 00:00:00 2001
From: dance858 <danielcederberg1@gmail.com>
Date: Wed, 15 Apr 2026 09:13:25 -0700
Subject: [PATCH 12/17] add expression specificc bindings and also clean up
 more

---
 SparseDiffEngine                              |   2 +-
 sparsediffpy/_bindings/bindings.c             |  19 +++
 sparsediffpy/_bindings/expression/forward.h   |  45 ++++++
 sparsediffpy/_bindings/expression/hessian.h   |  56 +++++++
 .../_bindings/expression/init_derivatives.h   |  46 ++++++
 sparsediffpy/_bindings/expression/jacobian.h  |  47 ++++++
 .../_bindings/expression/update_params.h      |  72 +++++++++
 sparsediffpy/_core/_compile.py                | 149 +++++++-----------
 sparsediffpy/_core/_nodes_affine.py           |  16 +-
 sparsediffpy/_core/_nodes_bivariate.py        |   8 +-
 tests/test_misc.py                            |   9 +-
 11 files changed, 360 insertions(+), 109 deletions(-)
 mode change 160000 => 120000 SparseDiffEngine
 create mode 100644 sparsediffpy/_bindings/expression/forward.h
 create mode 100644 sparsediffpy/_bindings/expression/hessian.h
 create mode 100644 sparsediffpy/_bindings/expression/init_derivatives.h
 create mode 100644 sparsediffpy/_bindings/expression/jacobian.h
 create mode 100644 sparsediffpy/_bindings/expression/update_params.h

diff --git a/SparseDiffEngine b/SparseDiffEngine
deleted file mode 160000
index bcdb0f0..0000000
--- a/SparseDiffEngine
+++ /dev/null
@@ -1 +0,0 @@
-Subproject commit bcdb0f0e74670b80b0f60b7ff02338dfa325fdf0
diff --git a/SparseDiffEngine b/SparseDiffEngine
new file mode 120000
index 0000000..684f0b6
--- /dev/null
+++ b/SparseDiffEngine
@@ -0,0 +1 @@
+/Users/daniel/Documents/software/NLP/SparseDiff/SparseDiffEngine
\ No newline at end of file
diff --git a/sparsediffpy/_bindings/bindings.c b/sparsediffpy/_bindings/bindings.c
index f9b1ac9..77ca0db 100644
--- a/sparsediffpy/_bindings/bindings.c
+++ b/sparsediffpy/_bindings/bindings.c
@@ -44,6 +44,13 @@
 #include "atoms/vector_mult.h"
 #include "atoms/xexp.h"
 
+/* Include expression-level bindings */
+#include "expression/forward.h"
+#include "expression/hessian.h"
+#include "expression/init_derivatives.h"
+#include "expression/jacobian.h"
+#include "expression/update_params.h"
+
 /* Include problem bindings */
 #include "problem/constraint_forward.h"
 #include "problem/gradient.h"
@@ -125,6 +132,18 @@ static PyMethodDef DNLPMethods[] = {
     {"get_expr_size", py_get_expr_size, METH_VARARGS,
      "Get the total size of an expression"},
     {"make_reshape", py_make_reshape, METH_VARARGS, "Create reshape atom"},
+    {"expr_forward", py_expr_forward, METH_VARARGS,
+     "Evaluate expression forward pass"},
+    {"expr_init_jacobian", py_expr_init_jacobian, METH_VARARGS,
+     "Initialize Jacobian sparsity for expression"},
+    {"expr_init_hessian", py_expr_init_hessian, METH_VARARGS,
+     "Initialize Hessian sparsity for expression"},
+    {"expr_jacobian", py_expr_jacobian, METH_VARARGS,
+     "Evaluate expression Jacobian"},
+    {"expr_hessian", py_expr_hessian, METH_VARARGS,
+     "Evaluate expression weighted-sum Hessian"},
+    {"expr_update_params", py_expr_update_params, METH_VARARGS,
+     "Update parameter values and propagate refresh flag"},
     {"make_problem", py_make_problem, METH_VARARGS,
      "Create problem from objective and constraints"},
     {"problem_init_derivatives", py_problem_init_derivatives, METH_VARARGS,
diff --git a/sparsediffpy/_bindings/expression/forward.h b/sparsediffpy/_bindings/expression/forward.h
new file mode 100644
index 0000000..b897ca2
--- /dev/null
+++ b/sparsediffpy/_bindings/expression/forward.h
@@ -0,0 +1,45 @@
+#ifndef EXPR_FORWARD_H
+#define EXPR_FORWARD_H
+
+#include "../atoms/common.h"
+
+static PyObject *py_expr_forward(PyObject *self, PyObject *args)
+{
+    PyObject *expr_capsule;
+    PyObject *u_obj;
+
+    if (!PyArg_ParseTuple(args, "OO", &expr_capsule, &u_obj))
+    {
+        return NULL;
+    }
+
+    expr *node = (expr *) PyCapsule_GetPointer(expr_capsule, EXPR_CAPSULE_NAME);
+    if (!node)
+    {
+        PyErr_SetString(PyExc_ValueError, "invalid expression capsule");
+        return NULL;
+    }
+
+    PyArrayObject *u_array =
+        (PyArrayObject *) PyArray_FROM_OTF(u_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    if (!u_array)
+    {
+        return NULL;
+    }
+
+    node->forward(node, (const double *) PyArray_DATA(u_array));
+    Py_DECREF(u_array);
+
+    npy_intp size = node->size;
+    PyObject *out = PyArray_SimpleNew(1, &size, NPY_DOUBLE);
+    if (!out)
+    {
+        return NULL;
+    }
+    memcpy(PyArray_DATA((PyArrayObject *) out), node->value,
+           size * sizeof(double));
+
+    return out;
+}
+
+#endif /* EXPR_FORWARD_H */
diff --git a/sparsediffpy/_bindings/expression/hessian.h b/sparsediffpy/_bindings/expression/hessian.h
new file mode 100644
index 0000000..7b12c0a
--- /dev/null
+++ b/sparsediffpy/_bindings/expression/hessian.h
@@ -0,0 +1,56 @@
+#ifndef EXPR_HESSIAN_H
+#define EXPR_HESSIAN_H
+
+#include "../atoms/common.h"
+
+static PyObject *py_expr_hessian(PyObject *self, PyObject *args)
+{
+    PyObject *expr_capsule;
+    PyObject *weights_obj;
+
+    if (!PyArg_ParseTuple(args, "OO", &expr_capsule, &weights_obj))
+    {
+        return NULL;
+    }
+
+    expr *node = (expr *) PyCapsule_GetPointer(expr_capsule, EXPR_CAPSULE_NAME);
+    if (!node)
+    {
+        PyErr_SetString(PyExc_ValueError, "invalid expression capsule");
+        return NULL;
+    }
+
+    PyArrayObject *weights_arr = (PyArrayObject *) PyArray_FROM_OTF(
+        weights_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    if (!weights_arr)
+    {
+        return NULL;
+    }
+
+    node->eval_wsum_hess(node, (const double *) PyArray_DATA(weights_arr));
+    Py_DECREF(weights_arr);
+
+    CSR_Matrix *H = node->wsum_hess;
+    npy_intp nnz = H->nnz;
+    npy_intp n_plus_1 = H->n + 1;
+
+    PyObject *data = PyArray_SimpleNew(1, &nnz, NPY_DOUBLE);
+    PyObject *indices = PyArray_SimpleNew(1, &nnz, NPY_INT32);
+    PyObject *indptr = PyArray_SimpleNew(1, &n_plus_1, NPY_INT32);
+
+    if (!data || !indices || !indptr)
+    {
+        Py_XDECREF(data);
+        Py_XDECREF(indices);
+        Py_XDECREF(indptr);
+        return NULL;
+    }
+
+    memcpy(PyArray_DATA((PyArrayObject *) data), H->x, nnz * sizeof(double));
+    memcpy(PyArray_DATA((PyArrayObject *) indices), H->i, nnz * sizeof(int));
+    memcpy(PyArray_DATA((PyArrayObject *) indptr), H->p, n_plus_1 * sizeof(int));
+
+    return Py_BuildValue("(OOO(ii))", data, indices, indptr, H->m, H->n);
+}
+
+#endif /* EXPR_HESSIAN_H */
diff --git a/sparsediffpy/_bindings/expression/init_derivatives.h b/sparsediffpy/_bindings/expression/init_derivatives.h
new file mode 100644
index 0000000..57b7dda
--- /dev/null
+++ b/sparsediffpy/_bindings/expression/init_derivatives.h
@@ -0,0 +1,46 @@
+#ifndef EXPR_INIT_DERIVATIVES_H
+#define EXPR_INIT_DERIVATIVES_H
+
+#include "../atoms/common.h"
+
+static PyObject *py_expr_init_jacobian(PyObject *self, PyObject *args)
+{
+    PyObject *expr_capsule;
+
+    if (!PyArg_ParseTuple(args, "O", &expr_capsule))
+    {
+        return NULL;
+    }
+
+    expr *node = (expr *) PyCapsule_GetPointer(expr_capsule, EXPR_CAPSULE_NAME);
+    if (!node)
+    {
+        PyErr_SetString(PyExc_ValueError, "invalid expression capsule");
+        return NULL;
+    }
+
+    jacobian_init(node);
+    Py_RETURN_NONE;
+}
+
+static PyObject *py_expr_init_hessian(PyObject *self, PyObject *args)
+{
+    PyObject *expr_capsule;
+
+    if (!PyArg_ParseTuple(args, "O", &expr_capsule))
+    {
+        return NULL;
+    }
+
+    expr *node = (expr *) PyCapsule_GetPointer(expr_capsule, EXPR_CAPSULE_NAME);
+    if (!node)
+    {
+        PyErr_SetString(PyExc_ValueError, "invalid expression capsule");
+        return NULL;
+    }
+
+    wsum_hess_init(node);
+    Py_RETURN_NONE;
+}
+
+#endif /* EXPR_INIT_DERIVATIVES_H */
diff --git a/sparsediffpy/_bindings/expression/jacobian.h b/sparsediffpy/_bindings/expression/jacobian.h
new file mode 100644
index 0000000..e01f914
--- /dev/null
+++ b/sparsediffpy/_bindings/expression/jacobian.h
@@ -0,0 +1,47 @@
+#ifndef EXPR_JACOBIAN_H
+#define EXPR_JACOBIAN_H
+
+#include "../atoms/common.h"
+
+static PyObject *py_expr_jacobian(PyObject *self, PyObject *args)
+{
+    PyObject *expr_capsule;
+
+    if (!PyArg_ParseTuple(args, "O", &expr_capsule))
+    {
+        return NULL;
+    }
+
+    expr *node = (expr *) PyCapsule_GetPointer(expr_capsule, EXPR_CAPSULE_NAME);
+    if (!node)
+    {
+        PyErr_SetString(PyExc_ValueError, "invalid expression capsule");
+        return NULL;
+    }
+
+    node->eval_jacobian(node);
+
+    CSR_Matrix *jac = node->jacobian;
+    npy_intp nnz = jac->nnz;
+    npy_intp m_plus_1 = jac->m + 1;
+
+    PyObject *data = PyArray_SimpleNew(1, &nnz, NPY_DOUBLE);
+    PyObject *indices = PyArray_SimpleNew(1, &nnz, NPY_INT32);
+    PyObject *indptr = PyArray_SimpleNew(1, &m_plus_1, NPY_INT32);
+
+    if (!data || !indices || !indptr)
+    {
+        Py_XDECREF(data);
+        Py_XDECREF(indices);
+        Py_XDECREF(indptr);
+        return NULL;
+    }
+
+    memcpy(PyArray_DATA((PyArrayObject *) data), jac->x, nnz * sizeof(double));
+    memcpy(PyArray_DATA((PyArrayObject *) indices), jac->i, nnz * sizeof(int));
+    memcpy(PyArray_DATA((PyArrayObject *) indptr), jac->p, m_plus_1 * sizeof(int));
+
+    return Py_BuildValue("(OOO(ii))", data, indices, indptr, jac->m, jac->n);
+}
+
+#endif /* EXPR_JACOBIAN_H */
diff --git a/sparsediffpy/_bindings/expression/update_params.h b/sparsediffpy/_bindings/expression/update_params.h
new file mode 100644
index 0000000..17f5d9e
--- /dev/null
+++ b/sparsediffpy/_bindings/expression/update_params.h
@@ -0,0 +1,72 @@
+#ifndef EXPR_UPDATE_PARAMS_H
+#define EXPR_UPDATE_PARAMS_H
+
+#include "../atoms/common.h"
+#include "subexpr.h"
+
+/*
+ * py_expr_update_params(root_capsule, param_capsule_list, theta_array)
+ *
+ * Updates parameter values from theta and propagates the refresh flag
+ * down the expression tree rooted at root_capsule.
+ */
+static PyObject *py_expr_update_params(PyObject *self, PyObject *args)
+{
+    PyObject *root_capsule;
+    PyObject *param_list;
+    PyObject *theta_obj;
+
+    if (!PyArg_ParseTuple(args, "OOO", &root_capsule, &param_list, &theta_obj))
+    {
+        return NULL;
+    }
+
+    expr *root = (expr *) PyCapsule_GetPointer(root_capsule, EXPR_CAPSULE_NAME);
+    if (!root)
+    {
+        PyErr_SetString(PyExc_ValueError, "invalid root expression capsule");
+        return NULL;
+    }
+
+    if (!PyList_Check(param_list))
+    {
+        PyErr_SetString(PyExc_TypeError,
+                        "second argument must be a list of parameter capsules");
+        return NULL;
+    }
+
+    PyArrayObject *theta_arr = (PyArrayObject *) PyArray_FROM_OTF(
+        theta_obj, NPY_DOUBLE, NPY_ARRAY_IN_ARRAY);
+    if (!theta_arr)
+    {
+        return NULL;
+    }
+
+    const double *theta = (const double *) PyArray_DATA(theta_arr);
+    Py_ssize_t n = PyList_Size(param_list);
+
+    for (Py_ssize_t i = 0; i < n; i++)
+    {
+        PyObject *capsule = PyList_GetItem(param_list, i);
+        expr *pnode = (expr *) PyCapsule_GetPointer(capsule, EXPR_CAPSULE_NAME);
+        if (!pnode)
+        {
+            Py_DECREF(theta_arr);
+            PyErr_SetString(PyExc_ValueError,
+                            "invalid parameter capsule in list");
+            return NULL;
+        }
+
+        parameter_expr *param = (parameter_expr *) pnode;
+        int offset = param->param_id;
+        memcpy(pnode->value, theta + offset, pnode->size * sizeof(double));
+    }
+
+    Py_DECREF(theta_arr);
+
+    expr_set_needs_refresh(root);
+
+    Py_RETURN_NONE;
+}
+
+#endif /* EXPR_UPDATE_PARAMS_H */
diff --git a/sparsediffpy/_core/_compile.py b/sparsediffpy/_core/_compile.py
index 09f1974..0135ffd 100644
--- a/sparsediffpy/_core/_compile.py
+++ b/sparsediffpy/_core/_compile.py
@@ -30,31 +30,27 @@ def compile(expr):
     """Compile an expression tree into a CompiledExpression.
 
     Walks the Python expression tree, discovers all Variables and Parameters,
-    builds C capsules bottom-up, creates a C problem, and initializes
-    sparsity patterns for Jacobian and Hessian computation.
+    builds C capsules bottom-up, and initializes sparsity patterns for
+    Jacobian and Hessian computation.
     """
-    # 1. Collect all Variable and Parameter leaves
+    # Collect all Variable and Parameter leaves. Raise an error
+    # if the expression does not contain any variable. 
     variables = []
     parameters = []
     _collect_leaves(expr, variables, parameters, set())
 
-    # 2. Determine the scope
-    scope = None
-    for v in variables:
-        if scope is None:
-            scope = v._scope
-        elif v._scope is not scope:
-            raise ValueError("All variables must belong to the same Scope")
+    if not variables:
+        raise ValueError("Expression must contain at least one Variable")
 
-    if scope is None:
-        from sparsediffpy._core._scope import Scope
-        scope = Scope()
+    # Check that all variables in the expression have the same scope
+    scope = variables[0]._scope
+    for v in variables[1:]:
+        if v._scope is not scope:
+            raise ValueError("All variables must belong to the same Scope")
 
     n_vars = scope._next_var_offset
-    if n_vars == 0:
-        n_vars = 1  # C layer needs at least 1 variable
 
-    # 3. Build C capsules bottom-up
+    # Build C capsules bottom-up
     capsule_cache = {}
     param_capsules_ordered = []
     param_objects_ordered = []
@@ -62,27 +58,16 @@ def compile(expr):
         expr, n_vars, capsule_cache, param_capsules_ordered, param_objects_ordered
     )
 
-    # 4. Create dummy zero objective (scalar)
-    dummy_obj = _C.make_parameter(1, 1, -1, n_vars, np.array([0.0]))
-
-    # 5. Create C problem with expr as the single constraint
-    problem = _C.make_problem(dummy_obj, [root_capsule], False)
-
-    # 6. Register parameters if any
-    if param_capsules_ordered:
-        _C.problem_register_params(problem, param_capsules_ordered)
-
-    # 7. Init sparsity patterns
-    _C.problem_init_jacobian(problem)
-    _C.problem_init_hessian(problem)
+    # Init sparsity patterns directly on the expression
+    _C.expr_init_jacobian(root_capsule)
+    _C.expr_init_hessian(root_capsule)
 
     return CompiledExpression(
-        problem_capsule=problem,
+        expr_capsule=root_capsule,
         scope=scope,
         param_capsules=param_capsules_ordered,
         param_objects=param_objects_ordered,
         expr_shape=expr.shape,
-        n_vars=n_vars,
     )
 
 
@@ -106,21 +91,18 @@ def _collect_leaves(node, variables, parameters, visited):
     if isinstance(node, (Constant, SparseConstant)):
         return
 
-    # Walk children
+    # Walk children. Nodes use one of three conventions:
+    #   .child            — unary ops (Neg, Sin, Exp, Reshape, ...)
+    #   .left / .right    — binary ops (Add, Multiply, MatMul, ParamScalarMult, ...)
+    #   .matrix_expr      — LeftMatMul / RightMatMul (the constant/parameter matrix)
+    # HStack uses .children. Some nodes combine these (e.g. LeftMatMul has both
+    # .child and .matrix_expr).
     if hasattr(node, "child"):
         _collect_leaves(node.child, variables, parameters, visited)
     if hasattr(node, "left"):
         _collect_leaves(node.left, variables, parameters, visited)
     if hasattr(node, "right"):
         _collect_leaves(node.right, variables, parameters, visited)
-    if hasattr(node, "x") and hasattr(node, "z"):
-        _collect_leaves(node.x, variables, parameters, visited)
-        _collect_leaves(node.z, variables, parameters, visited)
-    elif hasattr(node, "x") and hasattr(node, "y"):
-        _collect_leaves(node.x, variables, parameters, visited)
-        _collect_leaves(node.y, variables, parameters, visited)
-    if hasattr(node, "param_expr"):
-        _collect_leaves(node.param_expr, variables, parameters, visited)
     if hasattr(node, "matrix_expr"):
         _collect_leaves(node.matrix_expr, variables, parameters, visited)
     if hasattr(node, "children"):
@@ -135,6 +117,8 @@ def _collect_leaves(node, variables, parameters, visited):
 def _build_capsule(node, n_vars, cache, param_caps, param_objs):
     """Recursively build C capsules for the expression tree."""
     node_id = id(node)
+    
+    # catch common subexpressions
     if node_id in cache:
         return cache[node_id]
 
@@ -156,33 +140,28 @@ def _build_capsule(node, n_vars, cache, param_caps, param_objs):
 def _convert_node(node, n_vars, cache, param_caps, param_objs):
     """Convert a single Python expression node to a C capsule."""
 
+    d1, d2 = node.shape
+
     # --- Leaves ---
     if isinstance(node, Variable):
-        return _C.make_variable(
-            node.shape[0], node.shape[1], node._var_id, n_vars
-        )
+        return _C.make_variable(d1, d2, node._var_id, n_vars)
 
     if isinstance(node, Parameter):
         # Use current values if set, otherwise zeros as placeholder.
         # Real values are synced via problem_update_params before evaluation.
-        size = node.shape[0] * node.shape[1]
+        size = d1 * d2
         values = node._value_flat if node._value_flat is not None else np.zeros(size)
-        cap = _C.make_parameter(
-            node.shape[0], node.shape[1], node._param_id, n_vars, values,
-        )
+        cap = _C.make_parameter(d1, d2, node._param_id, n_vars, values)
         param_caps.append(cap)
         param_objs.append(node)
         return cap
 
     if isinstance(node, Constant):
-        return _C.make_parameter(
-            node.shape[0], node.shape[1], -1, n_vars, node._value_flat
-        )
+        return _C.make_parameter(d1, d2, -1, n_vars, node._value_flat)
 
     if isinstance(node, SparseConstant):
-        return _C.make_parameter(
-            node.shape[0], node.shape[1], -1, n_vars, node._to_dense_flat()
-        )
+        # right now we don't support sparse parameters in the C engine
+        return _C.make_parameter(d1, d2, -1, n_vars, node._to_dense_flat())
 
     # --- Matmul and multiply with parameter dispatch ---
     # These need special handling because they access matrix_expr / param_expr
@@ -194,13 +173,13 @@ def _convert_node(node, n_vars, cache, param_caps, param_objs):
         return _convert_right_matmul(node, n_vars, cache, param_caps, param_objs)
 
     if isinstance(node, ParamScalarMult):
-        param_cap = _build_capsule(node.param_expr, n_vars, cache, param_caps, param_objs)
-        child_cap = _build_capsule(node.child, n_vars, cache, param_caps, param_objs)
+        param_cap = _build_capsule(node.left, n_vars, cache, param_caps, param_objs)
+        child_cap = _build_capsule(node.right, n_vars, cache, param_caps, param_objs)
         return _C.make_param_scalar_mult(param_cap, child_cap)
 
     if isinstance(node, ParamVectorMult):
-        param_cap = _build_capsule(node.param_expr, n_vars, cache, param_caps, param_objs)
-        child_cap = _build_capsule(node.child, n_vars, cache, param_caps, param_objs)
+        param_cap = _build_capsule(node.left, n_vars, cache, param_caps, param_objs)
+        child_cap = _build_capsule(node.right, n_vars, cache, param_caps, param_objs)
         return _C.make_param_vector_mult(param_cap, child_cap)
 
     # --- Registry lookup ---
@@ -223,13 +202,6 @@ def _build_children(node, n_vars, cache, param_caps, param_objs):
         caps.append(_build_capsule(node.left, n_vars, cache, param_caps, param_objs))
     if hasattr(node, "right"):
         caps.append(_build_capsule(node.right, n_vars, cache, param_caps, param_objs))
-    # QuadOverLin/RelEntr: .x, .y or .x, .z
-    if hasattr(node, "x") and not caps:
-        caps.append(_build_capsule(node.x, n_vars, cache, param_caps, param_objs))
-        if hasattr(node, "z"):
-            caps.append(_build_capsule(node.z, n_vars, cache, param_caps, param_objs))
-        elif hasattr(node, "y"):
-            caps.append(_build_capsule(node.y, n_vars, cache, param_caps, param_objs))
     # HStack: .children
     if hasattr(node, "children"):
         for c in node.children:
@@ -239,6 +211,8 @@ def _build_children(node, n_vars, cache, param_caps, param_objs):
 
 # ---------------------------------------------------------------------------
 # Left/right matmul converters
+# These live here rather than in _registry.py because the Parameter case
+# needs _build_capsule, which would create a circular dependency.
 # ---------------------------------------------------------------------------
 
 def _convert_left_matmul(node, n_vars, cache, param_caps, param_objs):
@@ -252,14 +226,12 @@ def _convert_left_matmul(node, n_vars, cache, param_caps, param_objs):
 
     if isinstance(matrix, Parameter):
         param_cap = _build_capsule(matrix, n_vars, cache, param_caps, param_objs)
-        return make_dense_left_matmul(
-            param_cap, child_cap, _to_dense_row_major(matrix), m, n
-        )
+        vals = _to_dense_row_major(matrix)
+        return make_dense_left_matmul(param_cap, child_cap, vals, m, n)
 
     if isinstance(matrix, Constant):
-        return make_dense_left_matmul(
-            None, child_cap, _to_dense_row_major(matrix), m, n
-        )
+        vals = _to_dense_row_major(matrix)
+        return make_dense_left_matmul(None, child_cap, vals, m, n)
 
     raise TypeError(f"LeftMatMul matrix must be Constant, SparseConstant, or Parameter")
 
@@ -275,14 +247,12 @@ def _convert_right_matmul(node, n_vars, cache, param_caps, param_objs):
 
     if isinstance(matrix, Parameter):
         param_cap = _build_capsule(matrix, n_vars, cache, param_caps, param_objs)
-        return make_dense_right_matmul(
-            param_cap, child_cap, _to_dense_row_major(matrix), m, n
-        )
+        vals = _to_dense_row_major(matrix)
+        return make_dense_right_matmul(param_cap, child_cap, vals, m, n)
 
     if isinstance(matrix, Constant):
-        return make_dense_right_matmul(
-            None, child_cap, _to_dense_row_major(matrix), m, n
-        )
+        vals = _to_dense_row_major(matrix)
+        return make_dense_right_matmul(None, child_cap, vals, m, n)
 
     raise TypeError(f"RightMatMul matrix must be Constant, SparseConstant, or Parameter")
 
@@ -298,17 +268,16 @@ class CompiledExpression:
     Reads parameter values from the Parameter objects.
     """
 
-    def __init__(self, problem_capsule, scope, param_capsules, param_objects,
-                 expr_shape, n_vars):
-        self._problem = problem_capsule
+    def __init__(self, expr_capsule, scope, param_capsules, param_objects,
+                 expr_shape):
+        self._expr = expr_capsule
         self._scope = scope
         self._param_capsules = param_capsules
         self._param_objects = param_objects
         self._expr_shape = expr_shape
-        self._n_vars = n_vars
 
     def _sync_params(self):
-        """Push current parameter values to the C problem."""
+        """Push current parameter values to the C expression."""
         if not self._param_objects:
             return
         for p in self._param_objects:
@@ -319,19 +288,17 @@ def _sync_params(self):
                 )
         theta_parts = [p._value_flat for p in self._param_objects]
         theta = np.concatenate(theta_parts)
-        _C.problem_update_params(self._problem, theta)
+        _C.expr_update_params(self._expr, self._param_capsules, theta)
 
     def _set_point(self):
         """Push variable values and evaluate forward pass."""
         self._sync_params()
-        _C.problem_objective_forward(self._problem, self._scope._flat_values)
-        _C.problem_constraint_forward(self._problem, self._scope._flat_values)
+        _C.expr_forward(self._expr, self._scope._flat_values)
 
     def forward(self):
         """Evaluate the expression at the current variable values."""
-        self._set_point()
-        result = _C.problem_constraint_forward(self._problem, self._scope._flat_values)
-        return result
+        self._sync_params()
+        return _C.expr_forward(self._expr, self._scope._flat_values)
 
     def jacobian(self):
         """Compute the sparse Jacobian at the current variable values.
@@ -339,7 +306,7 @@ def jacobian(self):
         Returns scipy.sparse.csr_matrix of shape (expr_size, n_vars).
         """
         self._set_point()
-        data, indices, indptr, (m, n) = _C.problem_jacobian(self._problem)
+        data, indices, indptr, (m, n) = _C.expr_jacobian(self._expr)
         return scipy.sparse.csr_matrix((data, indices, indptr), shape=(m, n))
 
     def hessian(self, weights):
@@ -355,8 +322,6 @@ def hessian(self, weights):
         """
         weights = np.asarray(weights, dtype=np.float64).ravel()
         self._set_point()
-        _C.problem_jacobian(self._problem)
-        data, indices, indptr, (m, n) = _C.problem_hessian(
-            self._problem, 0.0, weights
-        )
+        _C.expr_jacobian(self._expr)
+        data, indices, indptr, (m, n) = _C.expr_hessian(self._expr, weights)
         return scipy.sparse.csr_matrix((data, indices, indptr), shape=(m, n))
diff --git a/sparsediffpy/_core/_nodes_affine.py b/sparsediffpy/_core/_nodes_affine.py
index d99eeb0..f2e2490 100644
--- a/sparsediffpy/_core/_nodes_affine.py
+++ b/sparsediffpy/_core/_nodes_affine.py
@@ -106,18 +106,18 @@ def __init__(self, child, flat_indices, result_shape):
 
 class ParamScalarMult(Expression):
     """a * f(x) where a is a scalar constant/parameter."""
-    def __init__(self, param_expr, child):
-        self.param_expr = param_expr
-        self.child = child
-        self.shape = child.shape
+    def __init__(self, left, right):
+        self.left = left
+        self.right = right
+        self.shape = right.shape
 
 
 class ParamVectorMult(Expression):
     """a . f(x) elementwise where a is a constant/parameter of matching shape."""
-    def __init__(self, param_expr, child):
-        self.param_expr = param_expr
-        self.child = child
-        self.shape = child.shape
+    def __init__(self, left, right):
+        self.left = left
+        self.right = right
+        self.shape = right.shape
 
 
 class LeftMatMul(Expression):
diff --git a/sparsediffpy/_core/_nodes_bivariate.py b/sparsediffpy/_core/_nodes_bivariate.py
index 7b38837..4366a74 100644
--- a/sparsediffpy/_core/_nodes_bivariate.py
+++ b/sparsediffpy/_core/_nodes_bivariate.py
@@ -56,8 +56,8 @@ def __init__(self, x, z):
             raise ValueError(
                 "quad_over_lin: z must not appear in x."
             )
-        self.x = x
-        self.z = z
+        self.left = x
+        self.right = z
         self.shape = (1, 1)
 
 
@@ -81,5 +81,5 @@ def __init__(self, x, y):
                 f"rel_entr: shapes must match or one must be scalar, "
                 f"got {x.shape} and {y.shape}"
             )
-        self.x = x
-        self.y = y
+        self.left = x
+        self.right = y
diff --git a/tests/test_misc.py b/tests/test_misc.py
index 0e87139..ccacf63 100644
--- a/tests/test_misc.py
+++ b/tests/test_misc.py
@@ -3,6 +3,7 @@
 compile twice, degenerate cases."""
 
 import numpy as np
+import pytest
 import sparsediffpy as sp
 from tests.utils import NumericalDerivativeChecker, random_point, random_positive_point
 
@@ -335,12 +336,12 @@ def test_identity_expression(self, scope, rng):
         J = fn.jacobian().toarray()
         np.testing.assert_allclose(J, np.eye(3))
 
-    def test_constant_expression(self, scope, rng):
-        """Compiling a constant (no variables)."""
+    def test_constant_expression_raises(self, scope, rng):
+        """Compiling a constant (no variables) should raise."""
         from sparsediffpy._core._constants import Constant
         c = Constant(np.array([1.0, 2.0, 3.0]), (3, 1))
-        fn = sp.compile(c)
-        np.testing.assert_allclose(fn.forward(), [1.0, 2.0, 3.0])
+        with pytest.raises(ValueError, match="at least one Variable"):
+            sp.compile(c)
 
     def test_nested_transpose(self, scope, rng):
         """x.T.T should be x."""

From 1ad1ed8fd7acc0e435cd962fbe0068a1cfaabb58 Mon Sep 17 00:00:00 2001
From: dance858 <danielcederberg1@gmail.com>
Date: Wed, 15 Apr 2026 09:35:36 -0700
Subject: [PATCH 13/17] add corner case test

---
 sparsediffpy/_core/_compile.py | 36 ++++++++++++++++++++++------------
 sparsediffpy/_core/_scope.py   |  2 ++
 tests/test_misc.py             | 26 ++++++++++++++++++++++--
 tests/utils.py                 |  5 +++++
 4 files changed, 54 insertions(+), 15 deletions(-)

diff --git a/sparsediffpy/_core/_compile.py b/sparsediffpy/_core/_compile.py
index 0135ffd..0a5d398 100644
--- a/sparsediffpy/_core/_compile.py
+++ b/sparsediffpy/_core/_compile.py
@@ -58,9 +58,8 @@ def compile(expr):
         expr, n_vars, capsule_cache, param_capsules_ordered, param_objects_ordered
     )
 
-    # Init sparsity patterns directly on the expression
-    _C.expr_init_jacobian(root_capsule)
-    _C.expr_init_hessian(root_capsule)
+    if param_capsules_ordered:
+        scope._params_dirty = True
 
     return CompiledExpression(
         expr_capsule=root_capsule,
@@ -275,10 +274,12 @@ def __init__(self, expr_capsule, scope, param_capsules, param_objects,
         self._param_capsules = param_capsules
         self._param_objects = param_objects
         self._expr_shape = expr_shape
+        self._jacobian_initialized = False
+        self._hessian_initialized = False
 
     def _sync_params(self):
-        """Push current parameter values to the C expression."""
-        if not self._param_objects:
+        """Push current parameter values to the C expression if any changed."""
+        if not self._scope._params_dirty:
             return
         for p in self._param_objects:
             if p._value_flat is None:
@@ -289,29 +290,37 @@ def _sync_params(self):
         theta_parts = [p._value_flat for p in self._param_objects]
         theta = np.concatenate(theta_parts)
         _C.expr_update_params(self._expr, self._param_capsules, theta)
+        self._scope._params_dirty = False
 
-    def _set_point(self):
-        """Push variable values and evaluate forward pass."""
-        self._sync_params()
-        _C.expr_forward(self._expr, self._scope._flat_values)
+    def _ensure_jacobian_initialized(self):
+        if not self._jacobian_initialized:
+            _C.expr_init_jacobian(self._expr)
+            self._jacobian_initialized = True
 
     def forward(self):
-        """Evaluate the expression at the current variable values."""
+        """Evaluate the expression at the current variable values.
+
+        Must be called before jacobian() or hessian().
+        """
+        self._ensure_jacobian_initialized()
         self._sync_params()
         return _C.expr_forward(self._expr, self._scope._flat_values)
 
     def jacobian(self):
         """Compute the sparse Jacobian at the current variable values.
 
+        Requires forward() to have been called first.
+
         Returns scipy.sparse.csr_matrix of shape (expr_size, n_vars).
         """
-        self._set_point()
         data, indices, indptr, (m, n) = _C.expr_jacobian(self._expr)
         return scipy.sparse.csr_matrix((data, indices, indptr), shape=(m, n))
 
     def hessian(self, weights):
         """Compute the sparse Hessian of the weighted expression.
 
+        Requires forward() and jacobian() to have been called first.
+
         The Hessian is of the scalar function w^T f(x), where w is the
         weights vector and f is the compiled expression.
 
@@ -320,8 +329,9 @@ def hessian(self, weights):
 
         Returns scipy.sparse.csr_matrix of shape (n_vars, n_vars).
         """
+        if not self._hessian_initialized:
+            _C.expr_init_hessian(self._expr)
+            self._hessian_initialized = True
         weights = np.asarray(weights, dtype=np.float64).ravel()
-        self._set_point()
-        _C.expr_jacobian(self._expr)
         data, indices, indptr, (m, n) = _C.expr_hessian(self._expr, weights)
         return scipy.sparse.csr_matrix((data, indices, indptr), shape=(m, n))
diff --git a/sparsediffpy/_core/_scope.py b/sparsediffpy/_core/_scope.py
index c64be8b..a63a626 100644
--- a/sparsediffpy/_core/_scope.py
+++ b/sparsediffpy/_core/_scope.py
@@ -60,6 +60,7 @@ def value(self, val):
         if val.size != self.size:
             raise DimensionError(f"expected {self.size} elements, got {val.size}")
         self._value_flat = val.copy()
+        self._scope._params_dirty = True
 
 
 class Scope:
@@ -71,6 +72,7 @@ def __init__(self):
         self._flat_values = np.zeros(0, dtype=np.float64)
         self._next_var_offset = 0
         self._next_param_offset = 0
+        self._params_dirty = False
 
     def Variable(self, d1, d2):
         """Create a new variable in this scope."""
diff --git a/tests/test_misc.py b/tests/test_misc.py
index ccacf63..d287cde 100644
--- a/tests/test_misc.py
+++ b/tests/test_misc.py
@@ -18,6 +18,8 @@ def test_neg_hessian_is_zero(self, scope, rng):
         f = -x
         fn = sp.compile(f)
         x0 = random_point(scope, rng)
+        fn.forward()
+        fn.jacobian()
         H = fn.hessian(rng.standard_normal(3))
         np.testing.assert_allclose(H.toarray(), np.zeros((3, 3)), atol=1e-14)
 
@@ -27,6 +29,8 @@ def test_add_hessian_is_zero(self, scope, rng):
         f = x + y
         fn = sp.compile(f)
         x0 = random_point(scope, rng)
+        fn.forward()
+        fn.jacobian()
         H = fn.hessian(rng.standard_normal(3))
         np.testing.assert_allclose(H.toarray(), np.zeros((6, 6)), atol=1e-14)
 
@@ -36,6 +40,8 @@ def test_hstack_hessian_is_zero(self, scope, rng):
         f = sp.hstack([x, y])
         fn = sp.compile(f)
         x0 = random_point(scope, rng)
+        fn.forward()
+        fn.jacobian()
         H = fn.hessian(rng.standard_normal(6))
         np.testing.assert_allclose(H.toarray(), np.zeros((6, 6)), atol=1e-14)
 
@@ -44,6 +50,8 @@ def test_index_hessian_is_zero(self, scope, rng):
         f = x[1:3]
         fn = sp.compile(f)
         x0 = random_point(scope, rng)
+        fn.forward()
+        fn.jacobian()
         H = fn.hessian(rng.standard_normal(2))
         np.testing.assert_allclose(H.toarray(), np.zeros((4, 4)), atol=1e-14)
 
@@ -52,6 +60,8 @@ def test_sum_hessian_is_zero(self, scope, rng):
         f = sp.sum(x)
         fn = sp.compile(f)
         x0 = random_point(scope, rng)
+        fn.forward()
+        fn.jacobian()
         H = fn.hessian(np.array([1.0]))
         np.testing.assert_allclose(H.toarray(), np.zeros((6, 6)), atol=1e-14)
 
@@ -87,10 +97,12 @@ def test_jacobian_updates_with_new_values(self, scope, rng):
         fn = sp.compile(f)
 
         x.value = np.array([0.0, 0.0, 0.0])
+        fn.forward()
         J1 = fn.jacobian().toarray()
         np.testing.assert_allclose(np.diag(J1), np.cos([0, 0, 0]))
 
         x.value = np.array([1.0, 2.0, 3.0])
+        fn.forward()
         J2 = fn.jacobian().toarray()
         np.testing.assert_allclose(np.diag(J2), np.cos([1, 2, 3]))
 
@@ -101,10 +113,14 @@ def test_hessian_updates_with_new_values(self, scope, rng):
         w = np.ones(3)
 
         x.value = np.array([0.0, 0.0, 0.0])
+        fn.forward()
+        fn.jacobian()
         H1 = fn.hessian(w).toarray()
         np.testing.assert_allclose(np.diag(H1), -np.sin([0, 0, 0]), atol=1e-14)
 
         x.value = np.array([1.0, 2.0, 3.0])
+        fn.forward()
+        fn.jacobian()
         H2 = fn.hessian(w).toarray()
         np.testing.assert_allclose(np.diag(H2), -np.sin([1, 2, 3]))
 
@@ -156,10 +172,12 @@ def test_left_matmul_jacobian_after_update(self, scope, rng):
         fn = sp.compile(f)
 
         x0 = random_point(scope, rng)
+        fn.forward()
         J1 = fn.jacobian().toarray()
         np.testing.assert_allclose(J1, np.eye(3), atol=1e-14)
 
         A.value = 2 * np.eye(3)
+        fn.forward()
         J2 = fn.jacobian().toarray()
         np.testing.assert_allclose(J2, 2 * np.eye(3), atol=1e-14)
 
@@ -171,10 +189,12 @@ def test_scalar_mult_jacobian_after_update(self, scope, rng):
         fn = sp.compile(f)
 
         x0 = random_point(scope, rng)
+        fn.forward()
         J1 = fn.jacobian().toarray()
         np.testing.assert_allclose(J1, 3.0 * np.eye(3), atol=1e-14)
 
         a.value = np.array([[7.0]])
+        fn.forward()
         J2 = fn.jacobian().toarray()
         np.testing.assert_allclose(J2, 7.0 * np.eye(3), atol=1e-14)
 
@@ -286,7 +306,9 @@ def test_two_compiles_independent(self, scope, rng):
         fn2 = sp.compile(f)
 
         x.value = np.array([1.0, 2.0, 3.0])
-        np.testing.assert_allclose(fn1.forward(), fn2.forward())
+        f1 = fn1.forward()
+        f2 = fn2.forward()
+        np.testing.assert_allclose(f1, f2)
         np.testing.assert_allclose(
             fn1.jacobian().toarray(), fn2.jacobian().toarray()
         )
@@ -333,7 +355,7 @@ def test_identity_expression(self, scope, rng):
         fn = sp.compile(x)
         x0 = random_point(scope, rng)
         np.testing.assert_allclose(fn.forward(), x0)
-        J = fn.jacobian().toarray()
+        J = fn.jacobian().toarray()  # forward() was just called above
         np.testing.assert_allclose(J, np.eye(3))
 
     def test_constant_expression_raises(self, scope, rng):
diff --git a/tests/utils.py b/tests/utils.py
index efb56fb..d6cc85a 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -31,6 +31,7 @@ def check_jacobian(self, x0):
         self._scope.set_values(x0)
 
         # Analytical Jacobian
+        self._fn.forward()
         J_analytical = self._fn.jacobian().toarray()
         m = J_analytical.shape[0]
 
@@ -73,6 +74,8 @@ def check_hessian(self, x0, weights):
 
         # Analytical Hessian
         self._scope.set_values(x0)
+        self._fn.forward()
+        self._fn.jacobian()
         H_analytical = self._fn.hessian(weights).toarray()
 
         # Numerical Hessian via central differences on the gradient
@@ -84,10 +87,12 @@ def check_hessian(self, x0, weights):
             x_minus[j] -= self._h
 
             self._scope.set_values(x_plus)
+            self._fn.forward()
             J_plus = self._fn.jacobian().toarray()
             grad_plus = J_plus.T @ weights
 
             self._scope.set_values(x_minus)
+            self._fn.forward()
             J_minus = self._fn.jacobian().toarray()
             grad_minus = J_minus.T @ weights
 

From 89867a3ed614d99c30ccecb8d0e1cc80d7b1ed9c Mon Sep 17 00:00:00 2001
From: dance858 <danielcederberg1@gmail.com>
Date: Wed, 15 Apr 2026 09:45:08 -0700
Subject: [PATCH 14/17] add test for matrix ordering of weights for hessian

---
 sparsediffpy/_core/_compile.py         |  6 ++++-
 tests/complicated/test_compositions.py | 37 ++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/sparsediffpy/_core/_compile.py b/sparsediffpy/_core/_compile.py
index 0a5d398..918cf7d 100644
--- a/sparsediffpy/_core/_compile.py
+++ b/sparsediffpy/_core/_compile.py
@@ -302,6 +302,10 @@ def forward(self):
 
         Must be called before jacobian() or hessian().
         """
+
+        # TODO: we want to refactor the c engine so the forward does not 
+        # depend on jacobian being initialized. I think that's only for one or 
+        # two atoms, if I recall correctly. Then we can remove this call. 
         self._ensure_jacobian_initialized()
         self._sync_params()
         return _C.expr_forward(self._expr, self._scope._flat_values)
@@ -332,6 +336,6 @@ def hessian(self, weights):
         if not self._hessian_initialized:
             _C.expr_init_hessian(self._expr)
             self._hessian_initialized = True
-        weights = np.asarray(weights, dtype=np.float64).ravel()
+        weights = np.asarray(weights, dtype=np.float64).ravel(order='F')
         data, indices, indptr, (m, n) = _C.expr_hessian(self._expr, weights)
         return scipy.sparse.csr_matrix((data, indices, indptr), shape=(m, n))
diff --git a/tests/complicated/test_compositions.py b/tests/complicated/test_compositions.py
index 666dc3e..a6bfb39 100644
--- a/tests/complicated/test_compositions.py
+++ b/tests/complicated/test_compositions.py
@@ -310,3 +310,40 @@ def test_hessian(self, scope, rng):
         checker = NumericalDerivativeChecker(fn, scope)
         x0 = random_point(scope, rng, low=-0.5, high=0.5)
         checker.check_hessian(x0, rng.standard_normal(3))
+
+
+# -----------------------------------------------------------------------
+# 9. Matrix hessian: sin(A @ X) with matrix variable and 2D weights
+# -----------------------------------------------------------------------
+
+class TestMatrixHessian:
+    def test_sin_AX_hessian(self, scope, rng):
+        X = scope.Variable(3, 3)
+        A = rng.standard_normal((3, 3))
+        f = sp.sin(A @ X)
+        fn = sp.compile(f)
+        checker = NumericalDerivativeChecker(fn, scope)
+        x0 = random_point(scope, rng, low=-0.5, high=0.5)
+        weights = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.float64)
+        checker.check_hessian(x0, weights.ravel(order='F'))
+
+    def test_sin_AX_hessian_2d_weights(self, scope, rng):
+        """Passing weights as a 2D array — hessian() should flatten column-major."""
+        X = scope.Variable(3, 3)
+        A = rng.standard_normal((3, 3))
+        f = sp.sin(A @ X)
+        fn = sp.compile(f)
+        x0 = random_point(scope, rng, low=-0.5, high=0.5)
+        weights = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.float64)
+
+        # 2D weights should be flattened column-major internally
+        fn.forward()
+        fn.jacobian()
+        H_2d = fn.hessian(weights)
+
+        # Compare against explicitly flattened F-order weights
+        fn.forward()
+        fn.jacobian()
+        H_flat = fn.hessian(weights.ravel(order='F'))
+
+        np.testing.assert_allclose(H_2d.toarray(), H_flat.toarray())

From 68c932c9cde01c0d69d43dbc210183662c15c2ee Mon Sep 17 00:00:00 2001
From: dance858 <danielcederberg1@gmail.com>
Date: Wed, 15 Apr 2026 09:58:54 -0700
Subject: [PATCH 15/17] remove jacobian initialization due to change in C
 engine

---
 sparsediffpy/_core/_compile.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/sparsediffpy/_core/_compile.py b/sparsediffpy/_core/_compile.py
index 918cf7d..dca986a 100644
--- a/sparsediffpy/_core/_compile.py
+++ b/sparsediffpy/_core/_compile.py
@@ -302,11 +302,6 @@ def forward(self):
 
         Must be called before jacobian() or hessian().
         """
-
-        # TODO: we want to refactor the c engine so the forward does not 
-        # depend on jacobian being initialized. I think that's only for one or 
-        # two atoms, if I recall correctly. Then we can remove this call. 
-        self._ensure_jacobian_initialized()
         self._sync_params()
         return _C.expr_forward(self._expr, self._scope._flat_values)
 
@@ -317,6 +312,9 @@ def jacobian(self):
 
         Returns scipy.sparse.csr_matrix of shape (expr_size, n_vars).
         """
+        if not self._jacobian_initialized:
+            _C.expr_init_jacobian(self._expr)
+            self._jacobian_initialized = True
         data, indices, indptr, (m, n) = _C.expr_jacobian(self._expr)
         return scipy.sparse.csr_matrix((data, indices, indptr), shape=(m, n))
 

From 87236d7c7bb0460d8259a1346a8d9d1350062c25 Mon Sep 17 00:00:00 2001
From: dance858 <danielcederberg1@gmail.com>
Date: Sat, 18 Apr 2026 18:02:26 -0700
Subject: [PATCH 16/17] problem class

---
 sparsediffpy/__init__.py       |   3 +
 sparsediffpy/_core/_problem.py | 194 +++++++++++++++++++++++
 tests/problem/__init__.py      |   0
 tests/problem/test_problem.py  | 276 +++++++++++++++++++++++++++++++++
 4 files changed, 473 insertions(+)
 create mode 100644 sparsediffpy/_core/_problem.py
 create mode 100644 tests/problem/__init__.py
 create mode 100644 tests/problem/test_problem.py

diff --git a/sparsediffpy/__init__.py b/sparsediffpy/__init__.py
index 29480ac..6a4b0be 100644
--- a/sparsediffpy/__init__.py
+++ b/sparsediffpy/__init__.py
@@ -24,6 +24,9 @@
 # Compile
 from sparsediffpy._core._compile import compile  # noqa: F401
 
+# Problem
+from sparsediffpy._core._problem import Problem  # noqa: F401
+
 # Elementwise functions
 from sparsediffpy._core._fn_elementwise import (  # noqa: F401
     sin, cos, exp, log, tan, sinh, tanh, asinh, atanh,
diff --git a/sparsediffpy/_core/_problem.py b/sparsediffpy/_core/_problem.py
new file mode 100644
index 0000000..9122fb2
--- /dev/null
+++ b/sparsediffpy/_core/_problem.py
@@ -0,0 +1,194 @@
+"""Problem class: wraps a C problem capsule (objective + list of constraints).
+
+Takes SparseDiffPy expressions (not CVXPY, not pre-built capsules). CVXPY-facing
+adapters live in downstream libraries (e.g. DNLP).
+"""
+
+import numpy as np
+
+from sparsediffpy import _sparsediffengine as _C
+from sparsediffpy._core._compile import _build_capsule, _collect_leaves
+
+
+class Problem:
+    """A compiled NLP-style problem: one scalar objective plus a list of constraints.
+
+    Method names mirror DNLP's `C_problem` so a CVXPY adapter can return a
+    Problem and existing solver callsites keep working.
+    """
+
+    def __init__(self, objective, constraints=None, verbose=False):
+        constraints = list(constraints) if constraints else []
+
+        if objective.shape != (1, 1):
+            raise ValueError(
+                f"Objective must be scalar (shape (1, 1)), got {objective.shape}"
+            )
+
+        variables, parameters = [], []
+        visited = set()
+        _collect_leaves(objective, variables, parameters, visited)
+        for c in constraints:
+            _collect_leaves(c, variables, parameters, visited)
+
+        if not variables:
+            raise ValueError("Problem must contain at least one Variable")
+
+        scope = variables[0]._scope
+        for v in variables[1:]:
+            if v._scope is not scope:
+                raise ValueError("All variables must belong to the same Scope")
+
+        n_vars = scope._next_var_offset
+
+        # One shared cache across objective + all constraints: CSE in both
+        # directions (within an expression, and across the obj/constraint
+        # boundary) is safe, and each Parameter capsule is appended to
+        # param_caps exactly once.
+        cache = {}
+        param_caps, param_objs = [], []
+        obj_cap = _build_capsule(objective, n_vars, cache, param_caps, param_objs)
+        constraint_caps = [
+            _build_capsule(c, n_vars, cache, param_caps, param_objs)
+            for c in constraints
+        ]
+
+        self._capsule = _C.make_problem(obj_cap, constraint_caps, verbose)
+        if param_caps:
+            _C.problem_register_params(self._capsule, param_caps)
+
+        self._scope = scope
+        self._param_capsules = param_caps
+        self._param_objects = param_objs
+        self._n_vars = n_vars
+        self._total_constraint_size = sum(c.size for c in constraints)
+        self._jacobian_coo_initialized = False
+        self._hessian_coo_initialized = False
+
+        if param_caps:
+            self._sync_params()
+
+    # ------------------------------------------------------------------
+    # Internal
+    # ------------------------------------------------------------------
+
+    def _sync_params(self):
+        """Push current Parameter values to the C problem.
+
+        Called once at construction. After construction, callers invoke
+        update_params(theta) explicitly (matching DNLP's solver-loop contract).
+        """
+        for p in self._param_objects:
+            if p._value_flat is None:
+                raise ValueError(
+                    f"Parameter with shape {p.shape} has no value set. "
+                    f"Assign a value via parameter.value = ... before constructing Problem."
+                )
+        theta = np.concatenate([p._value_flat for p in self._param_objects])
+        _C.problem_update_params(self._capsule, theta)
+        self._scope._params_dirty = False
+
+    # ------------------------------------------------------------------
+    # Parameter updates
+    # ------------------------------------------------------------------
+
+    def update_params(self, theta):
+        """Update parameter values in the C DAG from a flat theta vector.
+
+        Sparsity structures (Jacobian/Hessian) remain valid after this call.
+        """
+        theta = np.asarray(theta, dtype=np.float64)
+        _C.problem_update_params(self._capsule, theta)
+        self._scope._params_dirty = False
+
+    # ------------------------------------------------------------------
+    # Sparsity initialization (COO)
+    # ------------------------------------------------------------------
+
+    def init_jacobian_coo(self):
+        """Fill sparsity for the constraint Jacobian in COO format.
+
+        Must be called once before get_jacobian_sparsity_coo() or eval_jacobian_vals().
+        """
+        _C.problem_init_jacobian_coo(self._capsule)
+        self._jacobian_coo_initialized = True
+
+    def init_hessian_coo_lower_tri(self):
+        """Fill sparsity for the Lagrangian Hessian (lower triangle, COO).
+
+        Must be called once before get_problem_hessian_sparsity_coo() or
+        eval_hessian_vals_coo_lower_tri().
+        """
+        _C.problem_init_hessian_coo_lower_triangular(self._capsule)
+        self._hessian_coo_initialized = True
+
+    # ------------------------------------------------------------------
+    # Forward evaluation
+    # ------------------------------------------------------------------
+
+    def objective_forward(self, u):
+        """Evaluate the objective at variable values `u`. Returns a float."""
+        u = np.asarray(u, dtype=np.float64)
+        return _C.problem_objective_forward(self._capsule, u)
+
+    def constraint_forward(self, u):
+        """Evaluate constraints at variable values `u`. Returns an np.ndarray."""
+        u = np.asarray(u, dtype=np.float64)
+        return _C.problem_constraint_forward(self._capsule, u)
+
+    def gradient(self):
+        """Compute gradient of the objective. Call objective_forward first."""
+        return _C.problem_gradient(self._capsule)
+
+    # ------------------------------------------------------------------
+    # Jacobian (COO path)
+    # ------------------------------------------------------------------
+
+    def get_jacobian_sparsity_coo(self):
+        """Return the sparsity pattern (rows, cols) of the constraint Jacobian.
+
+        Call init_jacobian_coo() first.
+        """
+        rows, cols, _shape = _C.get_jacobian_sparsity_coo(self._capsule)
+        return rows, cols
+
+    def eval_jacobian_vals(self):
+        """Evaluate the constraint Jacobian and return its nonzero values.
+
+        Values correspond to the sparsity pattern from get_jacobian_sparsity_coo().
+        Call constraint_forward() first to set the evaluation point.
+        """
+        return _C.problem_eval_jacobian_vals(self._capsule)
+
+    # ------------------------------------------------------------------
+    # Lagrangian Hessian (COO lower-triangular path)
+    # ------------------------------------------------------------------
+
+    def get_problem_hessian_sparsity_coo(self):
+        """Return the sparsity pattern (rows, cols) of the lower-triangular
+        Lagrangian Hessian.
+
+        Call init_hessian_coo_lower_tri() first.
+        """
+        rows, cols, _shape = _C.get_problem_hessian_sparsity_coo(self._capsule)
+        return rows, cols
+
+    def eval_hessian_vals_coo_lower_tri(self, obj_factor, lagrange):
+        """Evaluate the lower-triangular Lagrangian Hessian values.
+
+        Computes obj_factor * H_f + sum_i lagrange[i] * H_gi, where f is the
+        objective and g_i are the constraints. Values correspond to the sparsity
+        pattern from get_problem_hessian_sparsity_coo().
+
+        Call objective_forward() and constraint_forward() first to set the
+        evaluation point.
+        """
+        lagrange = np.asarray(lagrange, dtype=np.float64)
+        if lagrange.size != self._total_constraint_size:
+            raise ValueError(
+                f"lagrange length {lagrange.size} != total_constraint_size "
+                f"{self._total_constraint_size}"
+            )
+        return _C.problem_eval_hessian_vals_coo(
+            self._capsule, float(obj_factor), lagrange
+        )
diff --git a/tests/problem/__init__.py b/tests/problem/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/problem/test_problem.py b/tests/problem/test_problem.py
new file mode 100644
index 0000000..2279c79
--- /dev/null
+++ b/tests/problem/test_problem.py
@@ -0,0 +1,276 @@
+"""Tests for sp.Problem: objective + list of constraints, derivatives via COO."""
+
+import numpy as np
+import pytest
+import scipy.sparse as sparse
+
+import sparsediffpy as sp
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _assemble_jacobian(problem, u, m, n):
+    """Evaluate the problem and assemble its COO Jacobian into a dense (m, n)."""
+    problem.objective_forward(u)
+    problem.constraint_forward(u)
+    rows, cols = problem.get_jacobian_sparsity_coo()
+    vals = problem.eval_jacobian_vals()
+    if m == 0:
+        return np.zeros((0, n))
+    return sparse.coo_matrix((vals, (rows, cols)), shape=(m, n)).toarray()
+
+
+def _assemble_hessian(problem, u, obj_factor, lagrange, n):
+    """Evaluate the problem and assemble its lower-triangular COO Hessian into
+    a dense symmetric (n, n)."""
+    problem.objective_forward(u)
+    problem.constraint_forward(u)
+    problem.gradient()            # required before hessian (populates obj adjoints)
+    problem.eval_jacobian_vals()  # required before hessian (populates constraint adjoints)
+    rows, cols = problem.get_problem_hessian_sparsity_coo()
+    vals = problem.eval_hessian_vals_coo_lower_tri(obj_factor, lagrange)
+    H_lower = sparse.coo_matrix((vals, (rows, cols)), shape=(n, n)).toarray()
+    return H_lower + H_lower.T - np.diag(np.diag(H_lower))
+
+
+def _numerical_gradient(problem, u, h=1e-6):
+    n = u.size
+    g = np.zeros(n)
+    for j in range(n):
+        u_p = u.copy(); u_p[j] += h
+        u_m = u.copy(); u_m[j] -= h
+        f_p = problem.objective_forward(u_p)
+        f_m = problem.objective_forward(u_m)
+        g[j] = (f_p - f_m) / (2 * h)
+    return g
+
+
+def _numerical_jacobian(problem, u, m, h=1e-6):
+    n = u.size
+    J = np.zeros((m, n))
+    for j in range(n):
+        u_p = u.copy(); u_p[j] += h
+        u_m = u.copy(); u_m[j] -= h
+        c_p = problem.constraint_forward(u_p)
+        c_m = problem.constraint_forward(u_m)
+        J[:, j] = (c_p - c_m) / (2 * h)
+    return J
+
+
+def _numerical_lagrangian_hessian(problem, u, obj_factor, lagrange, m, h=1e-5):
+    """Central differences on the Lagrangian gradient grad_L = obj_factor*grad_f + J^T lambda."""
+    n = u.size
+
+    def lag_grad(u_):
+        problem.objective_forward(u_)
+        problem.constraint_forward(u_)
+        gf = problem.gradient()
+        if m > 0:
+            rows, cols = problem.get_jacobian_sparsity_coo()
+            vals = problem.eval_jacobian_vals()
+            J = sparse.coo_matrix((vals, (rows, cols)), shape=(m, n)).toarray()
+            return obj_factor * gf + J.T @ lagrange
+        return obj_factor * gf
+
+    H = np.zeros((n, n))
+    for j in range(n):
+        u_p = u.copy(); u_p[j] += h
+        u_m = u.copy(); u_m[j] -= h
+        H[:, j] = (lag_grad(u_p) - lag_grad(u_m)) / (2 * h)
+    return (H + H.T) / 2
+
+
+# ---------------------------------------------------------------------------
+# Objective only, no constraints
+# ---------------------------------------------------------------------------
+
+def test_problem_objective_only_objective_forward(scope, rng):
+    x = scope.Variable(3, 1)
+    obj = sp.sum(sp.power(x, 2))
+    problem = sp.Problem(obj, [])
+    u = rng.standard_normal(3)
+    np.testing.assert_allclose(problem.objective_forward(u), float(np.sum(u ** 2)))
+
+
+def test_problem_objective_only_gradient(scope, rng):
+    x = scope.Variable(4, 1)
+    obj = sp.sum(sp.power(x, 2)) + sp.sum(sp.sin(x))
+    problem = sp.Problem(obj, [])
+    problem.init_jacobian_coo()
+    problem.init_hessian_coo_lower_tri()
+    u = rng.uniform(-0.5, 0.5, size=4)
+    problem.objective_forward(u)
+    np.testing.assert_allclose(problem.gradient(),
+                               _numerical_gradient(problem, u), rtol=1e-5, atol=1e-6)
+
+
+def test_problem_objective_only_hessian(scope, rng):
+    x = scope.Variable(3, 1)
+    obj = sp.sum(sp.power(x, 2)) + sp.sum(sp.sin(x))
+    problem = sp.Problem(obj, [])
+    problem.init_jacobian_coo()
+    problem.init_hessian_coo_lower_tri()
+
+    u = rng.uniform(-0.5, 0.5, size=3)
+    H = _assemble_hessian(problem, u, obj_factor=1.0, lagrange=np.zeros(0), n=3)
+    H_num = _numerical_lagrangian_hessian(problem, u, 1.0, np.zeros(0), m=0)
+    np.testing.assert_allclose(H, H_num, rtol=1e-4, atol=1e-6)
+
+
+def test_problem_zero_constraint_size(scope):
+    x = scope.Variable(2, 1)
+    obj = sp.sum(sp.power(x, 2))
+    problem = sp.Problem(obj, [])
+    assert problem._total_constraint_size == 0
+    u = np.array([1.0, 2.0])
+    c = problem.constraint_forward(u)
+    assert c.shape == (0,)
+
+
+# ---------------------------------------------------------------------------
+# Objective + single vector constraint
+# ---------------------------------------------------------------------------
+
+def test_problem_constraint_forward_and_jacobian(scope, rng):
+    x = scope.Variable(3, 1)
+    A = np.array([[1.0, 2.0, 3.0], [-1.0, 0.5, 2.0]])
+    b = np.array([[0.1], [-0.2]])
+    obj = sp.sum(sp.exp(x))
+    c = A @ x + b + sp.sin(x[:2])
+    problem = sp.Problem(obj, [c])
+    problem.init_jacobian_coo()
+
+    u = rng.uniform(-0.3, 0.3, size=3)
+
+    # constraint_forward value
+    c_val = problem.constraint_forward(u)
+    expected = A @ u + b.ravel() + np.sin(u[:2])
+    np.testing.assert_allclose(c_val, expected, rtol=1e-10)
+
+    # Jacobian (assembled from COO) vs numerical
+    J_analytic = _assemble_jacobian(problem, u, m=2, n=3)
+    J_numeric = _numerical_jacobian(problem, u, m=2)
+    np.testing.assert_allclose(J_analytic, J_numeric, rtol=1e-5, atol=1e-6)
+
+
+def test_problem_lagrangian_hessian(scope, rng):
+    x = scope.Variable(3, 1)
+    obj = sp.sum(sp.power(x, 2)) + sp.sum(sp.sin(x))
+    c1 = sp.exp(x) + x       # (3, 1)
+    c2 = sp.sum(sp.power(x, 3))  # (1, 1)
+    problem = sp.Problem(obj, [c1, c2])
+    problem.init_jacobian_coo()
+    problem.init_hessian_coo_lower_tri()
+
+    u = rng.uniform(-0.3, 0.3, size=3)
+    lagrange = rng.standard_normal(4)  # 3 + 1 = 4 constraint rows
+    obj_factor = 0.7
+
+    H = _assemble_hessian(problem, u, obj_factor, lagrange, n=3)
+    H_num = _numerical_lagrangian_hessian(problem, u, obj_factor, lagrange, m=4)
+    np.testing.assert_allclose(H, H_num, rtol=1e-4, atol=1e-6)
+
+
+# ---------------------------------------------------------------------------
+# Shared subexpression across obj + constraint
+# ---------------------------------------------------------------------------
+
+def test_problem_shared_subexpression(scope, rng):
+    """`t = sp.sin(x)` reused in both obj and constraint. The Python node is
+    shared but per-root atom caches ensure each root gets its own capsule, which
+    the C engine requires for correct reverse-mode accumulation."""
+    x = scope.Variable(3, 1)
+    t = sp.sin(x)               # shared Python node
+    obj = sp.sum(sp.power(t, 2))
+    c = t + x                   # also uses t
+    problem = sp.Problem(obj, [c])
+    problem.init_jacobian_coo()
+    problem.init_hessian_coo_lower_tri()
+
+    u = rng.uniform(-0.5, 0.5, size=3)
+
+    np.testing.assert_allclose(problem.objective_forward(u),
+                               float(np.sum(np.sin(u) ** 2)), rtol=1e-10)
+    np.testing.assert_allclose(problem.constraint_forward(u),
+                               np.sin(u) + u, rtol=1e-10)
+
+    J_analytic = _assemble_jacobian(problem, u, m=3, n=3)
+    np.testing.assert_allclose(J_analytic, _numerical_jacobian(problem, u, m=3),
+                               rtol=1e-5, atol=1e-6)
+
+    lagrange = rng.standard_normal(3)
+    H = _assemble_hessian(problem, u, 1.0, lagrange, n=3)
+    H_num = _numerical_lagrangian_hessian(problem, u, 1.0, lagrange, m=3)
+    np.testing.assert_allclose(H, H_num, rtol=1e-4, atol=1e-6)
+
+
+# ---------------------------------------------------------------------------
+# Parameter update flow
+# ---------------------------------------------------------------------------
+
+def test_problem_parameter_update(scope, rng):
+    x = scope.Variable(3, 1)
+    p = scope.Parameter(3, 1)
+    p.value = np.array([1.0, 2.0, 3.0])
+    obj = sp.sum(sp.power(x - p, 2))
+    problem = sp.Problem(obj, [])
+
+    u = np.array([0.0, 0.0, 0.0])
+    np.testing.assert_allclose(problem.objective_forward(u), 1.0 + 4.0 + 9.0)
+
+    new_theta = np.array([4.0, 5.0, 6.0])
+    problem.update_params(new_theta)
+    np.testing.assert_allclose(problem.objective_forward(u), 16.0 + 25.0 + 36.0)
+
+
+def test_problem_parameter_initial_value_required(scope):
+    x = scope.Variable(3, 1)
+    p = scope.Parameter(3, 1)  # value never set
+    obj = sp.sum(sp.power(x - p, 2))
+    with pytest.raises(ValueError, match="has no value set"):
+        sp.Problem(obj, [])
+
+
+# ---------------------------------------------------------------------------
+# Error cases
+# ---------------------------------------------------------------------------
+
+def test_problem_non_scalar_objective(scope):
+    x = scope.Variable(3, 1)
+    with pytest.raises(ValueError, match="scalar"):
+        sp.Problem(x, [])
+
+
+def test_problem_cross_scope_variables():
+    s1 = sp.Scope()
+    s2 = sp.Scope()
+    x = s1.Variable(2, 1)
+    y = s2.Variable(2, 1)
+    obj = sp.sum(sp.power(x, 2)) + sp.sum(sp.power(y, 2))
+    with pytest.raises(ValueError, match="same Scope"):
+        sp.Problem(obj, [])
+
+
+def test_problem_wrong_length_lagrange(scope):
+    x = scope.Variable(2, 1)
+    obj = sp.sum(sp.power(x, 2))
+    c = sp.exp(x)  # 2 constraints
+    problem = sp.Problem(obj, [c])
+    problem.init_jacobian_coo()
+    problem.init_hessian_coo_lower_tri()
+    problem.objective_forward(np.zeros(2))
+    problem.constraint_forward(np.zeros(2))
+    problem.eval_jacobian_vals()
+
+    with pytest.raises(ValueError, match="lagrange length"):
+        problem.eval_hessian_vals_coo_lower_tri(1.0, np.zeros(5))
+
+
+def test_problem_no_variables():
+    """A constant-only objective has no Variables — must raise."""
+    from sparsediffpy._core._constants import Constant
+    obj = Constant(np.array([[5.0]]), shape=(1, 1))
+    with pytest.raises(ValueError, match="at least one Variable"):
+        sp.Problem(obj, [])

From 4ea3383b7d3bbd16792fcc2ef0a84e93221b50fc Mon Sep 17 00:00:00 2001
From: dance858 <danielcederberg1@gmail.com>
Date: Mon, 11 May 2026 12:41:30 +0200
Subject: [PATCH 17/17] edits

---
 pyproject.toml                   |  2 +-
 sparsediffpy/__init__.py         |  2 +-
 sparsediffpy/_core/_compile.py   | 26 ++++++++++++++++++++++++
 sparsediffpy/_core/_fn_affine.py | 34 +++++++++++++++++++++++++++++++-
 4 files changed, 61 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index ab09d96..260c1dd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "scikit_build_core.build"
 
 [project]
 name = "sparsediffpy"
-version = "0.3.0"
+version = "0.2.9"
 description = "Python bindings for SparseDiffEngine automatic differentiation"
 requires-python = ">=3.11"
 dependencies = ["numpy >= 2.0.0"]
diff --git a/sparsediffpy/__init__.py b/sparsediffpy/__init__.py
index 6a4b0be..cd342b8 100644
--- a/sparsediffpy/__init__.py
+++ b/sparsediffpy/__init__.py
@@ -35,7 +35,7 @@
 
 # Affine / structural functions
 from sparsediffpy._core._fn_affine import (  # noqa: F401
-    diag_vec, trace, reshape, sum, prod, hstack, vstack,
+    broadcast, diag_vec, hstack, index_flat, prod, reshape, sum, trace, vstack,
 )
 
 # Bivariate / special functions
diff --git a/sparsediffpy/_core/_compile.py b/sparsediffpy/_core/_compile.py
index dca986a..a845619 100644
--- a/sparsediffpy/_core/_compile.py
+++ b/sparsediffpy/_core/_compile.py
@@ -297,6 +297,32 @@ def _ensure_jacobian_initialized(self):
             _C.expr_init_jacobian(self._expr)
             self._jacobian_initialized = True
 
+    def init_jacobian(self):
+        """Pre-compute the Jacobian sparsity pattern.
+
+        Optional: jacobian() will call this lazily on first use. Calling it
+        explicitly lets you pay the sparsity-analysis cost up front, which is
+        useful when you want predictable per-iteration latency in a solver
+        loop or when benchmarking the eval phase in isolation.
+
+        Idempotent.
+        """
+        if not self._jacobian_initialized:
+            _C.expr_init_jacobian(self._expr)
+            self._jacobian_initialized = True
+
+    def init_hessian(self):
+        """Pre-compute the Hessian sparsity pattern.
+
+        Optional: hessian() will call this lazily on first use. See
+        init_jacobian() for when to call this explicitly.
+
+        Idempotent.
+        """
+        if not self._hessian_initialized:
+            _C.expr_init_hessian(self._expr)
+            self._hessian_initialized = True
+
     def forward(self):
         """Evaluate the expression at the current variable values.
 
diff --git a/sparsediffpy/_core/_fn_affine.py b/sparsediffpy/_core/_fn_affine.py
index 901dbfe..d6af4ea 100644
--- a/sparsediffpy/_core/_fn_affine.py
+++ b/sparsediffpy/_core/_fn_affine.py
@@ -2,9 +2,11 @@
 
 import builtins as _builtins
 
+import numpy as np
+
 from sparsediffpy._core._expression import _wrap_constant
 from sparsediffpy._core._nodes_affine import (
-    DiagVec, HStack, Reshape, Sum, Trace, Transpose,
+    Broadcast, DiagVec, HStack, Index, Reshape, Sum, Trace, Transpose,
 )
 from sparsediffpy._core._nodes_other import Prod, ProdAxisOne, ProdAxisZero
 from sparsediffpy._core._shapes import validate_shape
@@ -23,6 +25,36 @@ def reshape(x, d1, d2):
     return Reshape(x, (d1, d2))
 
 
+def broadcast(x, shape):
+    """Broadcast a scalar or smaller-shaped expression to `shape`.
+
+    If `x.shape == shape`, returns `x` unchanged.
+    """
+    x = _wrap_constant(x)
+    shape = tuple(shape)
+    validate_shape(shape[0], shape[1])
+    if x.shape == shape:
+        return x
+    return Broadcast(x, shape)
+
+
+def index_flat(x, flat_indices, result_shape):
+    """Gather elements by pre-computed Fortran-flat indices into `x`.
+
+    `flat_indices` is an array of column-major indices into `x` (treated as a
+    flat buffer of size d1*d2). `result_shape` is the 2-D shape of the output.
+    """
+    flat_indices = np.asarray(flat_indices, dtype=np.int32)
+    result_shape = tuple(result_shape)
+    validate_shape(result_shape[0], result_shape[1])
+    if flat_indices.size != result_shape[0] * result_shape[1]:
+        raise ValueError(
+            f"flat_indices length {flat_indices.size} does not match "
+            f"result_shape {result_shape} (size {result_shape[0] * result_shape[1]})"
+        )
+    return Index(x, flat_indices, result_shape)
+
+
 def sum(x, axis=None):
     """Sum reduction.