FATE
FATE copied to clipboard
100W500特征数据, 运行SSHE训练出错
trafficstars
任务配置文件:
{
"dsl_version": 2,
"initiator": {
"role": "guest",
"party_id": 9999
},
"role": {
"host": [
10000
],
"guest": [
9999
]
},
"component_parameters": {
"role": {
"guest": {
"0": {
"data_transform_0": {
"with_label": true
},
"reader_0": {
"table": {
"name": "test_guest_100w_500_hetero",
"namespace": "test"
}
}
}
},
"host": {
"0": {
"data_transform_0": {
"with_label": false
},
"reader_0": {
"table": {
"name": "test_host_100w_500_hetero",
"namespace": "test"
}
}
}
}
},
"job_parameters":{
"common":{
"task_cores":16,
"timeout":604800
}
},
"common": {
"data_transform_0": {
"output_format": "dense"
},
"hetero_sshe_lr_0": {
"penalty": "L2",
"tol": 0.0001,
"alpha": 0.01,
"optimizer": "rmsprop",
"batch_size": -1,
"learning_rate": 0.1,
"init_param": {
"init_method": "zeros",
"fit_intercept": true
},
"max_iter": 3,
"early_stop": "diff",
"encrypt_param": {
"key_length": 1024
},
"reveal_strategy": "respectively",
"reveal_every_iter": true
},
"evaluation_0": {
"eval_type": "binary"
}
}
}
}
报错信息
[WARNING] [2022-08-16 18:17:22,362] [202208161736486735430] [10665:140295538583360] - [mini_batch.get_batch_generator] [line:92]: As batch_size >= data size, all batch strategy will be disabled
[ERROR] [2022-08-16 18:50:29,284] [202208161736486735430] [10665:140295538583360] - [task_executor._run_] [line:243]: ('Failed to call command: CommandURI(_uri=v1/egg-pair/runTask) to endpoint: nodemanager:43175, caused by: ', <_Rendezvous of RPC that terminated with:
status = StatusCode.UNKNOWN
details = "Exception calling application:
==== detail start, at 20220816.185029.205 ====
Traceback (most recent call last):
File "/data/projects/fate/eggroll/python/eggroll/core/pair_store/lmdb.py", line 244, in put
ret = self.txn.put(k, v)
lmdb.MapFullError: mdb_put: MDB_MAP_FULL: Environment mapsize limit reached
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/data/projects/fate/eggroll/python/eggroll/roll_pair/egg_pair.py", line 131, in _run_unary
func(rb, input_key_serdes, input_value_serdes, wb)
File "/data/projects/fate/eggroll/python/eggroll/roll_pair/egg_pair.py", line 263, in map_values_wrapper
output_writebatch.put(k_bytes, value_serdes.serialize(f(v)))
File "/data/projects/fate/eggroll/python/eggroll/core/pair_store/lmdb.py", line 248, in put
raise ValueError(f"put key={k}, value={v} raise Exception")
ValueError: put key=b'\x80\x03X \x00\x00\x005a4b25aaed25c2ee1b74de72dc03c14eq\x00.', value=b'\x80\x03cnumpy.core.multiarray\n_reconstruct\nq\x00cnumpy\nndarray\nq\x01K\x00\x85q\x02C\x01bq\x03\x87q\x04Rq\x05(K\x01M\xf5\x01\x85q\x06cnumpy\ndtype\nq\x07X\x02\x00\x00\x00O8q\x08K\x00K\x01\x87q\tRq\n(K\x03X\x01\x00\x00\x00|q\x0bNNNJ\xff\xff\xff\xffJ\xff\xff\xff\xffK?tq\x0cb\x89]q\r(cfederatedml.secureprotol.fixedpoint\nFixedPointNumber\nq\x0e)\x81q\x0f}q\x10(X\x01\x00\x00\x00nq\x11\x8a\x81\x8fo\t\x1e\xb5c_\xa2\xbb\x0f\xb2\xed|Lj?*\x96^\xceGS\x86\xc4\x83g\xd7\x053\x02\x01\x88\xad\xa3\xfb\x9dS\xbeJ\x8bL\xf9\x00V\xba\x93\x8bX\xcc\xe3RX\'zU\x86\xb4F\x19\xa76\xa1\x19\x1b\x19\xee\xca]\x8f\xc1W\x06\xf0\x8bE\xd1(\xc7/G\x12\xc2\xdb\x06\x06\x18\xe5"\x1530\x19\xd1#\n\xd5\xbc\xde\x1f<\xfc[\xf5\t} \x15\xcc=R\xde\xa9\\P\xe9w\xc2\xe6`\x8ah\x1a\r\xedZ\x02\xdf\x96\x00X\x07\x00\x00\x00max_intq\x12\x8a\x80\xc7\xb7\x04\x8f\xda\xb1/\xd1\xdd\x07\xd9v>&\xb5\x1f\x15K/\xe7\xa3)C\xe2\xc1\xb3\xeb\x82\x19\x81\x00\xc4\xd6\xd1\xfd\xce)_\xa5E\xa6|\x00+\xdd\xc9E,\xe6q)\xac\x13\xbd*CZ\xa3\x8cS\x9b\xd0\x8c\x8d\x0cw\xe5\xae\xc7\xe0+\x03\xf8\xc5\xa2h\x94\xe3\x97#\t\xe1m\x03\x03\x8cr\x91\x8a\x
Increasing task_cores or computing partitions to avoid this issue.
This issue has been marked as stale because it has been open for 365 days with no activity. If this issue is still relevant or if there is new information, please feel free to update or reopen it.