deep-symbolic-optimization
deep-symbolic-optimization copied to clipboard
adjusting best R or custom best R
Is there a way to change the function to verify the R result?
for example i have a csv with 1 input, 1 solution with 20 examples... i would like to change the reward like this:
def reward(x,1):
yfound = formula for x found result y1
if yfound == 0:
yfound = 100000000
return abs(y1 - yfound)
reward = 10000000000
for key, value in my-input-training.csv:
reward - reward(x,1)
R = reward / 100000000
[00:00:00:01.01] Training iteration 10, current best R: 0.9744
^
|
This value from the console output
now we can tell the network to increase the reward function...
Reward over all values should then go to the generator, so we have the target to increase the reward numbers... I currently see with my results the reward is only good for 1 of the samples and far far off from all others...
This is the json i used for the training:
{
"experiment": {
"logdir": "/code/mydata/log",
"exp_name": "Picoprimer4",
"seed": 40000,
"task_name": "Picoprimer4",
"save_path": "/code/mydata/log/pico4"
},
"task": {
"task_type": "regression",
"dataset": "/code/mydata/dataset/pico.csv",
"function_set": [
"add",
"sub",
"mul",
"div",
"sin",
"cos",
"tan",
"exp",
"log",
"sqrt",
"neg",
"abs",
"tanh",
"div",
1.0,
10.0,
3.14159265359,
1.618033988749894,
2.71828,
0.27182,
25.132741229,
8.0,
4.0,
2.0,
0.5
],
"metric": "inv_nrmse",
"metric_params": [
1.0
],
"extra_metric_test": null,
"extra_metric_test_params": [],
"threshold": 1e-12,
"protected": false,
"reward_noise": 0.0,
"reward_noise_type": "r",
"normalize_variance": false,
"decision_tree_threshold_set": [],
"poly_optimizer_params": {
"degree": 3,
"coef_tol": 1e-06,
"regressor": "dso_least_squares",
"regressor_params": {
"cutoff_p_value": 1.0,
"n_max_terms": null,
"coef_tol": 1e-06
}
}
},
"training": {
"n_samples": 2000000,
"batch_size": 1000,
"epsilon": 0.05,
"baseline": "R_e",
"alpha": 0.5,
"b_jumpstart": false,
"n_cores_batch": 1,
"complexity": "token",
"const_optimizer": "scipy",
"const_params": {
"method": "L-BFGS-B",
"options": {
"gtol": 0.001
}
},
"verbose": true,
"debug": 0,
"early_stopping": false,
"use_memory": true,
"memory_capacity": 10000.0,
"warm_start": null,
"memory_threshold": null
},
"logging": {
"save_all_iterations": false,
"save_summary": false,
"save_positional_entropy": false,
"save_pareto_front": true,
"save_cache": false,
"save_cache_r_min": 0.9,
"save_freq": 1,
"save_token_count": false,
"hof": 100
},
"state_manager": {
"type": "hierarchical",
"observe_action": false,
"observe_parent": true,
"observe_sibling": true,
"observe_dangling": false,
"embedding": false,
"embedding_size": 8
},
"policy": {
"policy_type": "rnn",
"max_length": 64,
"cell": "lstm",
"num_layers": 1,
"num_units": 32,
"initializer": "zeros"
},
"policy_optimizer": {
"policy_optimizer_type": "pg",
"summary": false,
"learning_rate": 0.0005,
"optimizer": "adam",
"entropy_weight": 0.03,
"entropy_gamma": 0.7
},
"gp_meld": {
"run_gp_meld": false,
"verbose": false,
"generations": 20,
"p_crossover": 0.5,
"p_mutate": 0.5,
"tournament_size": 5,
"train_n": 50,
"mutate_tree_max": 3,
"parallel_eval": false
},
"prior": {
"count_constraints": false,
"relational": {
"targets": [],
"effectors": [],
"relationship": null,
"on": false
},
"length": {
"min_": 4,
"max_": 64,
"on": true
},
"repeat": {
"tokens": "const",
"min_": null,
"max_": 3,
"on": true
},
"inverse": {
"on": true
},
"trig": {
"on": true
},
"const": {
"on": true
},
"no_inputs": {
"on": true
},
"uniform_arity": {
"on": true
},
"soft_length": {
"loc": 10,
"scale": 5,
"on": true
},
"domain_range": {
"on": false
},
"language_model": {
"weight": null,
"on": false
},
"multi_discrete": {
"dense": false,
"ordered": false,
"on": false
}
},
"postprocess": {
"show_count": 5,
"save_plots": true
},
"checkpoint": {}
}
Would love to hear some input...
Reward over all values should then go to the generator, so we have the target to increase the reward numbers... I currently see with my results the reward is only good for 1 of the samples and far far off from all others...
I'm not sure I am understanding your request; in particular what you mean by "reward over all values" and "1 of the samples". The reward value displayed to the user each iteration is just the reward of the single best object found so far. But that doesn't really have anything to do with training. The training algorithms are a function of all objects in a batch and their corresponding rewards.
I also don't really understand the reward
function you provided. It would help if you provided the full example instead of pseudocode. The reward function is a function of all points (Xy pairs) in your training data. X
should be 2D and y
should be 1D.