OSQP Fails to satisfy non-negativity constraints..
Hello,
I am solving the same instance with OSQP (jax implementation), and cvxpy (jax implementation) to make sure osqp works fine. So, I defined params of opt model (c, Q, A, b, G, h).. I attach my implementation of OSQP (the syntax is almost identical for cvxpy, so it should be consistent). Also, attaching solution and obj function values for comparison. As you can see, the solution obtained with OSQP has a lot of non-negligible negative values whereas I clearly defined non-negativity constraints with G1 and h1 (before concatenation).
Solution for OSQP:
[-0.02 0.24 -0.05 0.02 0.10 -0.11 0.07 5415.01 15512.61 6752.96 -0.08 15098.69 11017.92 13961.98 385.43 0.18 5304.95 21532.34 9044.19 31289.12 -0.03 0.06 10656.31 4041.63 6682.12 -0.18 10387.38 -0.92 3698.50 16051.65 0.04 16312.80 5336.27 8732.68 5336.98 0.06 -0.04 4122.94 4814.06 -0.07 0.24 -0.54 0.58 1819.60 6699.22 0.12 29101.66 14844.65 4398.87 21889.62 12871.99 1227.76 6914.96 6527.67 0.19 10744.04 0.03 0.07 4831.43 0.01 13437.68 -0.03 0.04 422.84 0.00 0.58 -1.82 1.79 3045.95 0.20 10636.08 0.06 0.03 99.92 0.01 -1.09 3.65 -4.10 1634.20 -0.40 26940.69 0.02 -0.02 2119.65 0.00 10858.76 0.01 -0.01 1838.44 0.01 0.74 -2.37 2.60 2331.72 0.26 11084.22 0.06 -0.06 5008.07 0.00 0.08 0.24 -0.03 0.01 -0.09 0.09 7040.50 3704.10 0.08 4831.09 0.09 8804.63 4633.46 0.07 422.52 0.10 1.44 -1.54 0.73 3046.04 0.09 5045.05 5591.34 0.08 99.62 0.09 -2.97 3.19 -1.33 1633.23 0.09 14742.62 12198.16 0.07 2119.39 0.09 4482.16 6376.60 0.06 1838.21 0.09 1.83 -2.02 0.94 2331.96 0.09 96.95 10987.13 0.07 5007.86 -0.02 -0.03 0.07 -0.06 0.04 10744.20 3703.86 -0.18 4831.25 0.04 13437.81 4633.28 -0.17 422.68 0.04 -1.74 2.80 -2.17 3046.78 -0.11 10636.15 5591.21 -0.14 99.77 0.04 3.90 -6.25 4.61 1632.05 0.38 26940.71 12198.13 -0.11 2119.54 0.04 10858.76 6376.59 -0.08 1838.37 0.04 -2.43 3.88 -2.88 2332.91 -0.17 11084.16 10987.20 -0.07 5008.00 0.04 116418.04]
Solution of cvxpy:
[0.00 0.00 0.00 0.04 0.27 0.03 0.00 5415.08 18942.58 6753.29 0.03 15098.47 11017.95 14030.04 385.71 0.01 5304.87 21532.08 8788.34 31289.34 0.02 0.03 10656.35 3673.64 6682.34 0.00 10386.79 0.03 3109.93 16051.92 0.01 16312.83 5336.26 8348.06 5337.12 0.00 0.00 4122.96 4570.15 0.01 0.01 0.03 0.08 903.69 6699.25 0.00 29101.87 14844.60 3666.43 21889.65 12709.02 5236.43 6531.30 2131.91 0.13 10743.57 0.00 0.00 1401.48 0.00 13437.30 0.00 0.00 354.78 0.00 0.01 0.02 0.03 3302.27 0.00 10635.82 0.03 0.00 467.88 0.00 0.00 0.01 0.04 2221.65 0.00 26940.58 0.00 0.00 2504.22 0.00 10858.72 0.00 0.01 2082.27 0.01 0.00 0.02 0.07 3248.23 0.00 11084.32 0.00 0.01 5740.42 0.01 0.00 0.00 0.00 0.04 0.14 0.00 6306.34 4436.85 0.00 1402.15 0.00 8925.21 4511.78 0.00 355.35 0.00 0.00 0.00 0.00 3302.72 0.00 6239.56 4396.09 0.00 468.24 0.00 0.00 0.00 0.00 2221.89 0.00 15053.96 11886.52 0.00 2504.36 0.00 3592.23 7266.47 0.01 2082.30 0.00 0.01 0.01 0.02 3248.17 0.00 92.65 10991.75 0.03 5740.25 0.00 0.00 0.00 0.01 0.00 10743.51 4437.06 0.13 1401.85 0.00 13437.24 4511.96 0.11 355.10 0.00 0.00 0.01 0.06 3302.50 0.00 10635.79 4396.21 0.07 468.08 0.00 0.00 0.00 0.04 2221.77 0.00 26940.56 11886.58 0.03 2504.30 0.00 10858.72 7266.49 0.01 2082.28 0.00 0.00 0.00 0.01 3248.20 0.01 11084.34 10991.71 0.00 5740.32 0.03 116414.62]
Objective function values for the same instance solved with SCIP (pyscipopt), OSQP (jaxopt), and cvxpy (jaxopt), respectively: 90220323.21, 90220030.0, 90220280.0
Code:
import numpy as np
def generate_instance(instance_id):
np.random.seed(instance_id)
instance_data = {}
T = 5
S = 1
J = 10
# Dimensions for the 2D deposit
#width, depth = 25, 5 # 2D deposit (width x depth)
width, depth = 15, 4 # 2D deposit (width x depth)
I = width * depth
# Map block IDs to 2D coordinates
block_to_coords = {i: (i % width, i // width) for i in range(I)}
coords_to_block = {v: k for k, v in block_to_coords.items()}
# Function to calculate overlying blocks for 2D (3 neighbors)
def get_overlying_blocks(x, y, width, depth):
overlying_blocks = []
# Check if the block has neighbors above it
if y + 1 < depth:
# Only look for neighbors in the left, center, and right positions
for dx in [-1, 0, 1]: # Check left, right, and current x
if 0 <= x + dx < width: # Ensure the x-coordinate is within bounds
overlying_blocks.append(coords_to_block[(x + dx, y + 1)])
# If there are fewer than 3 overlying blocks but at least 1, add the block ID itself
if len(overlying_blocks) < 3 and len(overlying_blocks) > 0:
overlying_blocks.append(coords_to_block[(x, y)]) # Add itself as an overlying block
return overlying_blocks
# Generate O dictionary
O = {i: get_overlying_blocks(*block_to_coords[i], width, depth) for i in range(I)}
# Generate block-level values
ExpectedGrade = np.array([0.001 + 0.02 * (j - 1) for j in range(1, J + 1)])
block_grade = np.random.uniform(ExpectedGrade[0], ExpectedGrade[-1]+(ExpectedGrade[2] - ExpectedGrade[0]), I)
block_tonnage = np.clip(np.random.normal(10800, 150, size=I), 0, None)
MiningCosts = np.clip(np.random.normal(2.3, 1.5, I), 0, None)
# Generate time-dependent values
CommodityPrice = np.clip(np.random.normal(3000, 500, (T, S)), 0, None)
# Generate scalar values
ProcesCosts = np.clip(np.random.normal(15, 10), 0, None)
HoldingCosts = np.clip(np.random.normal(0.2, 0.05), 0, None)
RehandlingCosts = np.clip(np.random.normal(1, 0.05), 0, None)
ExpandingProcessingCapCosts = np.clip(np.random.normal(50, 10), 0, None)
MiningCapacity = np.clip(np.random.normal(6500, 500), 0, None)
MiningRateShouldNotFluctuateTooMuch = np.random.normal(0.1, 0.05) # Some penalty factor
# Generate min and max grade constraints
MinGrade = np.random.uniform(0, ExpectedGrade[round(J/2)])
MaxGrade = np.random.uniform(ExpectedGrade[round(J/2)], ExpectedGrade[-1])
gamma = 0.1
discount_rate = np.array([1 / (1 + gamma) ** t for t in range(1, T + 1)])
# Initialize q matrix
q = np.zeros((I, J))
# Assign tonnage to the correct type based on ranges
for i in range(I):
if block_grade[i] >= ExpectedGrade[-1]: # If grade is larger than last ExpectedGrade
q[i, J-1] = block_tonnage[i]
else:
for j in range(J-1): # Check bins for grades within expected ranges
if ExpectedGrade[j] <= block_grade[i] < ExpectedGrade[j+1]:
q[i, j] = block_tonnage[i]
break # Assign to the first matching type
instance_data.update({
'T': T,
'S': S,
'I': I,
'J': J,
'O': O,
'MiningCosts': MiningCosts,
'CommodityPrice': CommodityPrice,
'ProcesCosts': ProcesCosts,
'HoldingCosts': HoldingCosts,
'RehandlingCosts': RehandlingCosts,
'ExpandingProcessingCapCosts': ExpandingProcessingCapCosts,
'MiningCapacity': MiningCapacity,
'MiningRateShouldNotFluctuateTooMuch': MiningRateShouldNotFluctuateTooMuch,
'MinGrade': MinGrade,
'MaxGrade': MaxGrade,
'discount_rate': discount_rate,
'ExpectedGrade': ExpectedGrade,
'q': q,
'block_grade': block_grade,
'block_tonnage': block_tonnage
})
return instance_data
instance_data = generate_instance(156)
T, S, I, J, O, discount_rate, q, MiningCosts, CommodityPrice, ExpectedGrade, ProcesCosts, HoldingCosts, RehandlingCosts, ExpandingProcessingCapCosts,MiningCapacity, MiningRateShouldNotFluctuateTooMuch, MinGrade, MaxGrade, block_grade, block_tonnage = instance_data['T'], instance_data['S'], instance_data['I'], instance_data['J'], instance_data['O'], instance_data['discount_rate'], instance_data['q'], instance_data['MiningCosts'], instance_data['CommodityPrice'], instance_data['ExpectedGrade'], instance_data['ProcesCosts'], instance_data['HoldingCosts'], instance_data['RehandlingCosts'], instance_data['ExpandingProcessingCapCosts'], instance_data['MiningCapacity'], instance_data['MiningRateShouldNotFluctuateTooMuch'], instance_data['MinGrade'], instance_data['MaxGrade'], instance_data['block_grade'], instance_data['block_tonnage']
regularization = 0.01
block_score_at_time_t = np.random.uniform(0, 1, size=(I, T))
#@jax.jit
def solve_qp_osqp2_pure(y, discount_rate, q, MiningCosts, CommodityPrice, ExpectedGrade, ProcesCosts, HoldingCosts, RehandlingCosts, ExpandingProcessingCapCosts, MinGrade, MaxGrade, regularization):
T = 5
S = 1
I = 60
J = 10
# Total number of variables
n = 4 * J * T * S + 1
N = J * T * S # Total number of elements in one block: 3*3*1 = 9
# Q matrix (diagonal)
Q_diag = jnp.concatenate([jnp.full(4 * J * T * S, 2 * regularization / S), jnp.array([2 * regularization])])
Q = jnp.diag(Q_diag)
# c vector
c_xMP = jnp.array([-(1/S) * discount_rate[t] * (CommodityPrice[t,s] * ExpectedGrade[j] - ProcesCosts)
for j in range(J) for t in range(T) for s in range(S)])
c_xMH = jnp.zeros(J * T * S)
c_xHP = jnp.array([-(1/S) * discount_rate[t] * (CommodityPrice[t,s] * ExpectedGrade[j] - ProcesCosts - RehandlingCosts)
for j in range(J) for t in range(T) for s in range(S)])
c_xH = jnp.array([(1/S) * discount_rate[t] * HoldingCosts for j in range(J) for t in range(T) for s in range(S)])
c_xP = jnp.array([ExpandingProcessingCapCosts])
c = jnp.concatenate([c_xMP, c_xMH, c_xHP, c_xH, c_xP])
# all variables >= 0
G1 = -jnp.eye(n)
h1 = jnp.zeros(n)
#all variables <=1
G2 = jnp.eye(n)
h2 = jnp.ones(n)
# Linking constraint (G3 and h3)
G3_rows = []
h3_values = []
# Create G3 and h3 based on the linking constraint
for j in range(J):
for t in range(T):
for s in range(S):
row = jnp.zeros(n)
# Indices for xMP and xMH
idx_xMP = j * T * S + t * S + s
idx_xMH = J * T * S + j * T * S + t * S + s
row = row.at[idx_xMP].set(1) # Coefficient for xMP[j,t,s]
row = row.at[idx_xMH].set(1) # Coefficient for xMH[j,t,s]
# Sum of q[i,j] * y[i,t]
rhs_value = jnp.sum(q[:, j] * y[:, t]) # Sum over all i for given j and t
G3_rows.append(row)
h3_values.append(rhs_value)
# Convert to matrices
G3 = jnp.stack(G3_rows) # Shape should be (J*T*S, n)
h3 = jnp.array(h3_values) # Shape should be (J*T*S,)
# Stockpiles linking constraint (G4 and h4)
h4_values = []
G4 = jnp.zeros((J*S, n))
# Compute block offsets for the decision vector x
# Order in x: [xMP, xMH, xHP, xH, xP]
offset_xMP = 0
offset_xMH = N
offset_xHP = 2 * N
offset_xH = 3 * N
# xP is at index 4*N and does not appear in the constraint
# Create G4 and h4 based on the stockpile linking constraint
# Build the constraints for t = 0 for each (j, s)
row = 0
for j in range(J):
for s in range(S):
t = 0 # Only t = 0 is used in the constraint.
# Flatten index using ordering (j, t, s)
idx = j * (T * S) + t * S + s
# For the constraint: xH[j,0,s] - xMH[j,0,s] <= 0,
# we assign a coefficient -1 for xMH[j,0,s] and +1 for xH[j,0,s].
col_xMH = offset_xMH + idx # Position in xMH block
col_xH = offset_xH + idx # Position in xH block
G4 = G4.at[row, col_xMH].set(-1)
G4 = G4.at[row, col_xH].set(1)
row += 1
h4_values.append(0) # Right-hand side is 0 for the stockpile constraint
# Convert to matrices
h4 = jnp.array(h4_values) # Shape should be (J*S,)
# End-of-period stockpile quantity constraint (G5 and h5)
G5_rows = []
h5_values = []
for j in range(J):
for s in range(S):
for t in range(1, T): # t starts from 1
row = jnp.zeros(n)
# Indices for variables
idx_xH_t = 3 * J * T * S + j * T * S + t * S + s # xH[j, t, s]
idx_xH_t_prev = 3 * J * T * S + j * T * S + (t-1) * S + s # xH[j, t-1, s]
idx_xMH = J * T * S + j * T * S + t * S + s # xMH[j, t, s]
idx_xHP = 2 * J * T * S + j * T * S + t * S + s # xHP[j, t, s]
# Set coefficients: xH[j, t, s] - xH[j, t-1, s] - xMH[j, t, s] + xHP[j, t, s] <= 0
row = row.at[idx_xH_t].set(1) # Coefficient for xH[j, t, s]
row = row.at[idx_xH_t_prev].set(-1) # Coefficient for xH[j, t-1, s]
row = row.at[idx_xMH].set(-1) # Coefficient for xMH[j, t, s]
row = row.at[idx_xHP].set(1) # Coefficient for xHP[j, t, s]
G5_rows.append(row)
h5_values.append(0) # Right-hand side is 0
# Convert to matrices
G5 = jnp.stack(G5_rows) # Shape should be (J * S * (T-1), n)
h5 = jnp.array(h5_values) # Shape should be (J * S * (T-1),)
#Processing caacity constraint
# Initialize G as a jnp array of zeros
G6 = jnp.zeros((T*S, n))
h6_values = []
# Block offsets in the decision variable vector x
offset_xMP = 0 # xMP: indices 0 to N-1
offset_xMH = N # xMH: indices N to 2N-1
offset_xHP = 2 * N # xHP: indices 2N to 3N-1
offset_xH = 3 * N # xH: indices 3N to 4N-1
offset_xP = 4 * N # xP: index 4N
# Build constraints: for each (t, s) add sum_j (xMP[j,t,s] + xHP[j,t,s]) - xP <= 0.
row = 0
for t in range(T):
for s in range(S):
# Loop over j to add contributions from xMP and xHP.
for j in range(J):
# Flatten index for (j, t, s) using ordering (j,t,s)
idx = j * (T * S) + t * S + s
# For xMP, add coefficient +1.
G6 = G6.at[row, offset_xMP + idx].add(1)
# For xHP, add coefficient +1.
G6 = G6.at[row, offset_xHP + idx].add(1)
# For xP, add coefficient -1.
G6 = G6.at[row, offset_xP].set(-1)
row += 1
h6_values.append(0)
h6 = jnp.array(h6_values)
#Blending constraint #1
# Initialize the G matrix as a jnp array of zeros.
G7 = jnp.zeros((T*S, n))
h7_values = []
# Compute coefficient for each j: (ExpectedGrade[j] - MaxGrade)
coeffs = ExpectedGrade - MaxGrade
# Build constraints: For each (t, s) we impose
# sum_{j} ( (ExpectedGrade[j] - MaxGrade) * (xMP[j,t,s] + xHP[j,t,s]) ) <= 0.
row = 0
for t in range(T):
for s in range(S):
for j in range(J):
# Compute flattened index for (j, t, s) using order (j,t,s)
idx = j * (T * S) + t * S + s
# For xMP block: add the coefficient.
G7 = G7.at[row, offset_xMP + idx].set(coeffs[j])
# For xHP block: add the same coefficient.
G7 = G7.at[row, offset_xHP + idx].set(coeffs[j])
row += 1
h7_values.append(0)
h7 = jnp.array(h7_values)
# Initialize the G matrix as a jnp array of zeros.
G8 = jnp.zeros((S*T, n))
h8 = jnp.zeros(S*T)
# Compute coefficient for each j: (MinGrade - ExpectedGrade[j])
coeffs2 = MinGrade - ExpectedGrade # This yields an array like [ -0.2, -0.3, -0.4 ] if using the example values.
# Build constraints: For each (t, s) we impose
# sum_{j=0}^{J-1} [ (MinGrade - ExpectedGrade[j]) * (xMP[j,t,s] + xHP[j,t,s]) ] <= 0.
row = 0
for t in range(T):
for s in range(S):
for j in range(J):
# Compute flattened index for (j, t, s) using the ordering (j,t,s)
idx = j * (T * S) + t * S + s
# In the xMP block, add coefficient (MinGrade - ExpectedGrade[j])
G8 = G8.at[row, offset_xMP + idx].set(coeffs2[j])
# In the xHP block, add the same coefficient
G8 = G8.at[row, offset_xHP + idx].set(coeffs2[j])
row += 1
# Initialize lists to collect rows and right-hand side values
A_rows = []
b_values = []
# Loop over all j and s to define the constraint for xHP[j,0,s] == 0
for j in range(J):
for s in range(S):
# Create a row of zeros of length n (total number of variables)
row = jnp.zeros(n)
# Compute the index for xHP[j, 0, s]
idx_xHP0 = (2 * J * T * S) + j * T * S + s # Note: t=0
# Set the coefficient for xHP[j,0,s] to 1
row = row.at[idx_xHP0].set(1)
A_rows.append(row)
b_values.append(0) # Right-hand side is 0
# Convert lists to matrices/vectors
A = jnp.stack(A_rows) # Shape: (J * S, n)
b = jnp.array(b_values) # Shape: (J * S,)
G = jnp.vstack([G1, G3, G4, G5, G6, G7, G8])
h = jnp.concatenate([h1, h3, h4, h5, h6, h7, h8])
# Solve
qp = OSQP(tol=1e-10)
#qp = CvxpyQP(solver='SCS') #solver = 'OSQP'
#init_params = jnp.zeros(n)
#sol = qp.run(init_params=init_params, params_obj=(Q, c), params_eq=(A, b), params_ineq=(G, h)).params
sol = qp.run(params_obj=(Q, c), params_eq=(A, b), params_ineq=(G, h)).params
x_opt = sol.primal
#x_opt = jnp.maximum(x_opt, 0)
dual_eq = sol.dual_eq
#dual_eq = jnp.maximum(dual_eq, 0)
dual_ineq = sol.dual_ineq
#dual_ineq = jnp.maximum(dual_ineq, 0)
c_dual = jnp.concatenate([b, h])
z_dual = jnp.concatenate([dual_eq, dual_ineq])
#obj_value = -(0.5 * x_opt.T @ Q @ x_opt + c.T @ x_opt)
obj_value = -(0.5 * jnp.dot(x_opt.T, jnp.dot(Q, x_opt)) + jnp.dot(c.T, x_opt))
obj_value_dual = jnp.dot(c_dual.T, z_dual)
return x_opt, obj_value, obj_value_dual
Hello @LidiiaS,
Thanks for the issue and the reproduction code. Unfortunately, Jaxopt is archived.