inverse_rl
inverse_rl copied to clipboard
I cannot understand the linear programming part in the code
Can the auther give more explanation about the part of linear programming. The code is shown as below: ` def performInverseReinforcementLearning( policy, gamma=0.5, l1=10): trans_probs=constructTransitionMatrix() conditions=[] c = np.zeros([3 * 25])
for i in range(25):
optimalAction = policy[i]
tempTransProbMatrix= gamma * trans_probs[:, :, optimalAction]
test = np.identity(25)-tempTransProbMatrix
tempInverse = np.linalg.inv(np.identity(25) - tempTransProbMatrix)
print(tempInverse.shape)
for j in range(4):
if(j != optimalAction):
test_c = trans_probs[i, :, optimalAction]
print(test_c.shape)
condition= - np.dot(trans_probs[i, :, optimalAction] - trans_probs[i, :, j], tempInverse)
print(condition.shape)
conditions.append(condition)
print(len(conditions), len(condition[0]))
equality=np.zeros(625)
for i in range(25):
c[25:2 * 25] = -1
c[2 * 25:] = l1
print(c.shape)
print(c)
conditions=np.array(conditions)
print(conditions)
conditions=np.reshape(conditions,[625,75])
print(conditions)
print(len(c),conditions.shape)
rewards=linprog(c, A_ub=conditions, b_ub=equality)
#rewards = rewards/max(rewards)
return rewards`