Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/example.rst
Original file line number Diff line number Diff line change
Expand Up @@ -32,3 +32,4 @@ Example Gallery
examples/GridSearchCV_reg_losses.ipynb

examples/ElasticNet.ipynb
examples/Adaptive_ElasticNet.ipynb
201 changes: 201 additions & 0 deletions doc/source/examples/Adaptive_ElasticNet.ipynb

Large diffs are not rendered by default.

32 changes: 17 additions & 15 deletions rehline/_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,7 @@ def call_ReLHLoss(self, score):
if self.H > 0:
rehu_input = (self._S.T * score[:, np.newaxis]).T + self._T
return np.sum(_relu(relu_input), 0) + np.sum(_rehu(rehu_input, self._Tau), 0)

@abstractmethod
def fit(self, X, y, sample_weight):
"""Fit model."""
Expand Down Expand Up @@ -286,7 +286,7 @@ def ReHLine_solver(
T=None,
A=None,
b=None,
rho=0.0,
rho=None,
Lambda=None,
Gamma=None,
xi=None,
Expand All @@ -307,6 +307,8 @@ def ReHLine_solver(
A = np.empty(shape=(0, 0))
if b is None:
b = np.empty(shape=(0))
if rho is None:
rho = np.empty(shape=(0))
if Lambda is None:
Lambda = np.empty(shape=(0, 0))
if Gamma is None:
Expand All @@ -330,14 +332,14 @@ def ReHLine_solver(
X,
A,
b,
rho,
U,
V,
S,
T,
Tau,
max_iter,
tol,
rho,
shrink,
verbose,
trace_freq,
Expand Down Expand Up @@ -773,32 +775,32 @@ def _cast_sample_weight(U, V, Tau, S, T, C=1.0, sample_weight=None):
# U_new = np.zeros((self.L+2, n+d))
# V_new = np.zeros((self.L+2, n+d))
# ## Block 1
# if len(self.U):
# U_new[:self.L, :n] = self.U
# V_new[:self.L, :n] = self.V
# if len(self._U):
# U_new[:self.L, :n] = self._U
# V_new[:self.L, :n] = self._V
# ## Block 2
# U_new[-2,n:] = l1_pen
# U_new[-1,n:] = -l1_pen

# if len(self.S):
# if len(self._S):
# S_new = np.zeros((self.H, n+d))
# T_new = np.zeros((self.H, n+d))
# Tau_new = np.zeros((self.H, n+d))

# S_new[:,:n] = self.S
# T_new[:,:n] = self.T
# Tau_new[:,:n] = self.Tau
# S_new[:,:n] = self._S
# T_new[:,:n] = self._T
# Tau_new[:,:n] = self._Tau

# self.S = S_new
# self.T = T_new
# self.Tau = Tau_new
# self._S = S_new
# self._T = T_new
# self._Tau = Tau_new

# ## fake X
# X_fake = np.zeros((n+d, d))
# X_fake[:n,:] = X
# X_fake[n:,:] = np.identity(d)

# self.U = U_new
# self.V = V_new
# self._U = U_new
# self._V = V_new
# self.auto_shape()
# return X_fake
27 changes: 25 additions & 2 deletions rehline/_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ class plqERM_ElasticNet(_BaseReHLine, BaseEstimator):

.. math::

\min_{\mathbf{\beta} \in \mathbb{R}^d} C \sum_{i=1}^n \text{PLQ}(y_i, \mathbf{x}_i^T \mathbf{\beta}) + \text{l1_ratio} \| \mathbf{\beta} \|_1 + \frac{1}{2} (1 - \text{l1_ratio}) \| \mathbf{\beta} \|_2^2, \ \text{ s.t. } \
\min_{\mathbf{\beta} \in \mathbb{R}^d} C \sum_{i=1}^n \text{PLQ}(y_i, \mathbf{x}_i^T \mathbf{\beta}) + \text{l1_ratio} \sum_{j=1}^d \omega_j | \beta_j | + \frac{1}{2} (1 - \text{l1_ratio}) \| \mathbf{\beta} \|_2^2, \ \text{ s.t. } \
\mathbf{A} \mathbf{\beta} + \mathbf{b} \geq \mathbf{0},

The function supports various loss functions, including:
Expand Down Expand Up @@ -527,6 +527,9 @@ class plqERM_ElasticNet(_BaseReHLine, BaseEstimator):
The ElasticNet mixing parameter, with 0 <= l1_ratio < 1. For l1_ratio = 0 the penalty
is an L2 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.

omega : array of shape (n_features, ), default=np.empty(shape=(0, 0))
Weight coefficients for adaptive lasso.

verbose : int, default=0
Enable verbose output. Note that this setting takes advantage of a
per-process runtime setting in liblinear that, if enabled, may not work
Expand Down Expand Up @@ -584,6 +587,7 @@ def __init__(
constraint=None,
C=1.0,
l1_ratio=0.5,
omega=None,
U=None,
V=None,
Tau=None,
Expand All @@ -602,8 +606,8 @@ def __init__(
self.constraint = constraint if constraint is not None else []
self.C = C
self.l1_ratio = l1_ratio
self.rho = l1_ratio / (1 - l1_ratio)
self.C_eff = C / (1 - l1_ratio)
self.omega = omega if omega is not None else np.empty(shape=(0, 0))
self._U = U if U is not None else np.empty(shape=(0, 0))
self._V = V if V is not None else np.empty(shape=(0, 0))
self._S = S if S is not None else np.empty(shape=(0, 0))
Expand Down Expand Up @@ -678,6 +682,25 @@ def fit(self, X, y, sample_weight=None):
self._xi = np.empty(shape=(0, 0))
self._mu = np.empty(shape=(0, 0))

if self.l1_ratio == 0:
self.rho = None
if self.omega.size > 0:
warnings.warn(
f"Omega will be ignored since l1_ratio=0.",
UserWarning,
stacklevel=2
)
else:
if self.omega.size not in (0, d):
raise ValueError(
f"Omega length {self.omega.size} must be 0 or {d} (n_features)"
)
if not np.all(self.omega > 0):
raise ValueError(
"All elements in omega must be strictly positive."
)
self.rho = np.full(d, self.l1_ratio / (1 - self.l1_ratio)) * (self.omega if self.omega.size == d else 1.0)

result = ReHLine_solver(
X=X,
U=U_weight,
Expand Down
2 changes: 1 addition & 1 deletion rehline/_internal.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,14 @@ def rehline_internal(
X: npt.NDArray[np.float64],
A: npt.NDArray[np.float64],
b: npt.NDArray[np.float64],
rho: npt.NDArray[np.float64],
U: npt.NDArray[np.float64],
V: npt.NDArray[np.float64],
S: npt.NDArray[np.float64],
T: npt.NDArray[np.float64],
Tau: npt.NDArray[np.float64],
max_iter: int,
tol: float,
rho: float = ...,
shrink: int = ...,
verbose: int = ...,
trace_freq: int = ...,
Expand Down
8 changes: 4 additions & 4 deletions src/rehline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,15 @@ using ReHLineResult = rehline::ReHLineResult<Matrix>;

void rehline_internal(
ReHLineResult& result,
const MapMat& X, const MapMat& A, const MapVec& b,
const MapMat& X, const MapMat& A, const MapVec& b, const MapVec& rho,
const MapMat& U, const MapMat& V,
const MapMat& S, const MapMat& T, const MapMat& Tau,
int max_iter, double tol, double rho = 0.0, int shrink = 1,
int max_iter, double tol, int shrink = 1,
int verbose = 0, int trace_freq = 100
)
{
rehline::rehline_solver(result, X, A, b, U, V, S, T, Tau,
max_iter, tol, rho, shrink, verbose, trace_freq);
rehline::rehline_solver(result, X, A, b, rho, U, V, S, T, Tau,
max_iter, tol, shrink, verbose, trace_freq);
}

PYBIND11_MODULE(_internal, m) {
Expand Down
63 changes: 34 additions & 29 deletions src/rehline.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ void reset_fv_set(std::vector<std::pair<Index, Index>>& fvset, std::size_t n, st
// * S, T, Tau: [H x n]
// * A : [K x d]
// * b : [K]
// * rho : [d]
// - Pre-computed
// * r: [n]
// * p: [K]
Expand Down Expand Up @@ -139,6 +140,7 @@ class ReHLineSolver
const Index m_L;
const Index m_H;
const Index m_K;
const Index m_W;

// Input matrices and vectors
RMatrix m_X;
Expand All @@ -149,7 +151,7 @@ class ReHLineSolver
ConstRefMat m_Tau;
RMatrix m_A;
ConstRefVec m_b;
Scalar m_rho; // l1_ratio / (1 - l1_ratio)
ConstRefVec m_rho;

// Pre-computed
Vector m_gk_denom; // ||a[k]||^2
Expand All @@ -174,7 +176,7 @@ class ReHLineSolver
// =================== Initialization functions =================== //

// Compute the primal variable beta from dual variables
// beta = A'xi - U3 * vec(Lambda) - S3 * vec(Gamma)
// beta = A'xi - U3 * vec(Lambda) - S3 * vec(Gamma) + 2 * Mu - rho
// A can be empty, one of U and V may be empty
inline void set_primal()
{
Expand All @@ -193,9 +195,9 @@ class ReHLineSolver
if (m_H > 0)
LHterm.noalias() += m_S.cwiseProduct(m_Gamma).colwise().sum().transpose();
m_beta.noalias() -= m_X.transpose() * LHterm;

if (m_rho > 0)
m_beta.noalias() += Scalar(2.0) * m_mu - m_rho * Vector::Ones(m_d);
// L1 related
if (m_W > 0)
m_beta.noalias() += Scalar(2.0) * m_mu - m_rho;
}

// =================== Evaluating objection function =================== //
Expand Down Expand Up @@ -224,7 +226,7 @@ class ReHLineSolver
// Quadratic term
result += Scalar(0.5) * m_beta.squaredNorm();
// L1 penalty term
result += m_rho * m_beta.template lpNorm<1>();
result += m_beta.cwiseAbs().cwiseProduct(m_rho).sum();
return result;
}

Expand All @@ -251,8 +253,8 @@ class ReHLineSolver
}
// 2 * Mu - rho, [d x 1]
Vector MuR = Vector::Zero(m_d);
if (m_rho > 0)
MuR = Scalar(2.0) * m_mu - m_rho * Vector::Ones(m_d);
if (m_W > 0)
MuR = Scalar(2.0) * m_mu - m_rho;
// Compute dual objective function value
Scalar obj = Scalar(0);
// If K = 0, all terms that depend on A, xi, or b will be zero
Expand All @@ -261,30 +263,31 @@ class ReHLineSolver
// 0.5 * ||Atxi||^2 - Atxi' * U3L - Atxi' * S3G + Atxi' MuR + xi' * b
const Scalar Atxi_U3L = (m_L > 0) ? (Atxi.dot(U3L)) : Scalar(0);
const Scalar Atxi_S3G = (m_H > 0) ? (Atxi.dot(S3G)) : Scalar(0);
const Scalar Atxi_MuR = (m_rho > 0) ? (Atxi.dot(MuR)) : Scalar(0);
const Scalar Atxi_MuR = (m_W > 0) ? (Atxi.dot(MuR)) : Scalar(0);
obj += Scalar(0.5) * Atxi.squaredNorm() - Atxi_U3L - Atxi_S3G + Atxi_MuR + m_xi.dot(m_b);
}
// If L = 0, all terms that depend on U, V, or Lambda will be zero
if (m_L > 0)
{
// 0.5 * ||U3L||^2 + U3L' * S3G - U3L' * MuR - tr(Lambda * V')
const Scalar U3L_S3G = (m_H > 0) ? (U3L.dot(S3G)) : Scalar(0);
const Scalar U3L_MuR = (m_rho > 0) ? (U3L.dot(MuR)) : Scalar(0);
const Scalar U3L_MuR = (m_W > 0) ? (U3L.dot(MuR)) : Scalar(0);
obj += Scalar(0.5) * U3L.squaredNorm() + U3L_S3G - U3L_MuR -
m_Lambda.cwiseProduct(m_V).sum();
}
// If H = 0, all terms that depend on S, T, or Gamma will be zero
if (m_H > 0)
{
// 0.5 * ||S3G||^2 - S3G' * MuR + 0.5 * ||Gamma||^2 - tr(Gamma * T')
const Scalar S3G_MuR = (m_rho > 0) ? (S3G.dot(MuR)) : Scalar(0);
const Scalar S3G_MuR = (m_W > 0) ? (S3G.dot(MuR)) : Scalar(0);
obj += Scalar(0.5) * S3G.squaredNorm() - S3G_MuR + Scalar(0.5) * m_Gamma.squaredNorm() -
m_Gamma.cwiseProduct(m_T).sum();
}
// If rho = 0, all terms that depend on rho, or Mu will be zero
if (m_rho > 0)
obj += Scalar(2.0) * m_mu.squaredNorm() - Scalar(2.0) * m_rho * m_mu.sum() +
Scalar(0.5) * m_d * m_rho * m_rho;
// If W = 0, all terms that depend on rho or Mu will be zero
if (m_W > 0)
// 2.0 * ||Mu||^2 - 2.0 * rho' * Mu + 0.5 * ||rho||^2
obj += Scalar(2.0) * m_mu.squaredNorm() - Scalar(2.0) * m_rho.dot(m_mu) +
Scalar(0.5) * m_rho.squaredNorm();
return obj;
}

Expand Down Expand Up @@ -368,7 +371,7 @@ class ReHLineSolver
// Update mu and beta
inline void update_mu_beta()
{
if (m_rho <= 0)
if (m_W <= 0)
return;

// Save original Mu
Expand Down Expand Up @@ -598,7 +601,7 @@ class ReHLineSolver
// Overloaded version based on free variable set
inline void update_mu_beta(std::vector<Index>& fv_set, Scalar& min_pg, Scalar& max_pg)
{
if (m_rho <= 0)
if (m_W <= 0)
return;

// Permutation
Expand All @@ -618,12 +621,13 @@ class ReHLineSolver
for (auto j: fv_set)
{
const Scalar mu_j = m_mu[j];
const Scalar rho_j = m_rho[j];

// Compute g_j
const Scalar g_j = m_beta[j];
// PG and shrink
Scalar pg;
const bool shrink = pg_mu(mu_j, g_j, m_rho, lb, ub, pg);
const bool shrink = pg_mu(mu_j, g_j, rho_j, lb, ub, pg);
if (shrink)
continue;

Expand All @@ -632,7 +636,7 @@ class ReHLineSolver
min_pg = std::min(min_pg, pg);
// Compute new mu_j
const Scalar candid = mu_j - g_j * Scalar(0.5);
const Scalar newmu = std::max(Scalar(0), std::min(m_rho, candid));
const Scalar newmu = std::max(Scalar(0), std::min(rho_j, candid));
// Update mu and beta
m_mu[j] = newmu;
m_beta[j] += Scalar(2.0) * (newmu - mu_j);
Expand All @@ -647,8 +651,9 @@ class ReHLineSolver
ReHLineSolver(ConstRefMat X, ConstRefMat U, ConstRefMat V,
ConstRefMat S, ConstRefMat T, ConstRefMat Tau,
ConstRefMat A, ConstRefVec b,
Scalar rho) :
m_n(X.rows()), m_d(X.cols()), m_L(U.rows()), m_H(S.rows()), m_K(A.rows()),
ConstRefVec rho) :
m_n(X.rows()), m_d(X.cols()), m_L(U.rows()), m_H(S.rows()), m_K(A.rows()),
m_W(rho.rows()), // check if l1 penalty is implemented
m_X(X), m_U(U), m_V(V), m_S(S), m_T(T), m_Tau(Tau), m_A(A), m_b(b),
m_rho(rho),
m_gk_denom(m_K), m_gli_denom(m_L, m_n), m_ghi_denom(m_H, m_n),
Expand Down Expand Up @@ -691,10 +696,10 @@ class ReHLineSolver
// Gamma.fill(std::min(1.0, 0.5 * tau));
}

// Each element of Mu satisfies 0 <= mu_j <= rho,
// Each element of Mu satisfies 0 <= mu_j <= rho_j,
// and we use min(0.5 * rho, 1) to initialize (rho can be Inf)
if (m_rho > 0)
m_mu.setConstant( std::min(Scalar(0.5) * m_rho, Scalar(1)) );
if (m_W > 0)
m_mu.noalias() = (m_rho * Scalar(0.5)).cwiseMin(Scalar(1));

// Set primal variable based on duals
set_primal();
Expand Down Expand Up @@ -746,15 +751,15 @@ class ReHLineSolver
}


if (m_rho > 0)
if (m_W > 0)
{
// Check shape of warmstart parameters
if (mu_ws.size() != m_d){
throw std::invalid_argument("mu_ws must have size d");
}
// Check values of warmstart parameters
if ((mu_ws.array() < Scalar(0)).any() || (mu_ws.array() > m_rho).any()){
throw std::invalid_argument("mu_ws must be in [0, rho]");
if ((mu_ws.array() < Scalar(0)).any() || (mu_ws.array() > m_rho.array()).any()){
throw std::invalid_argument("mu_ws_j must be in [0, rho_j]");
}
m_mu = mu_ws;
}
Expand Down Expand Up @@ -928,10 +933,10 @@ template <typename DerivedMat, typename DerivedVec, typename Index = int>
void rehline_solver(
ReHLineResult<typename DerivedMat::PlainObject, Index>& result,
const Eigen::MatrixBase<DerivedMat>& X, const Eigen::MatrixBase<DerivedMat>& A,
const Eigen::MatrixBase<DerivedVec>& b,
const Eigen::MatrixBase<DerivedVec>& b, const Eigen::MatrixBase<DerivedVec>& rho,
const Eigen::MatrixBase<DerivedMat>& U, const Eigen::MatrixBase<DerivedMat>& V,
const Eigen::MatrixBase<DerivedMat>& S, const Eigen::MatrixBase<DerivedMat>& T, const Eigen::MatrixBase<DerivedMat>& Tau,
Index max_iter, typename DerivedMat::Scalar tol, typename DerivedMat::Scalar rho = 0,
Index max_iter, typename DerivedMat::Scalar tol,
Index shrink = 1, Index verbose = 0, Index trace_freq = 100,
std::ostream& cout = std::cout
)
Expand Down
Loading
Loading