FATE icon indicating copy to clipboard operation
FATE copied to clipboard

2000万rsa求交,遇到错误

Open ykcirh opened this issue 3 years ago • 0 comments

Describe the bug RSA2000万求交,2048bit(1024bit正常),rollsite遇到错误,看日志应该是pull状态没有更新,具体如下:

[ERROR] [2022-06-08 15:57:05,239] [202206081003093650570] [13721:140648555665216] - [task_executor._run_] [line:243]: roll site pull waiting failed because there is no updated progress: rs_key=__rsk#202206081003093650570_intersection_0_0#hash.42634ddb254c23eb01e6.host_prvkey_ids#fit#host#60001#guest#60002, rs_header=<ErRollSiteHeader(roll_site_session_id='202206081003093650570_intersection_0_0', name='hash.42634ddb254c23eb01e6.host_prvkey_ids', tag='fit', src_role='host', src_party_id='60001', dst_role='guest', dst_party_id='60002', data_type='', options=[{}], total_partitions=-1, partition_id=-1, total_streams=-1, total_batches=-1, stream_seq=-1, batch_seq=-1, stage=) at 0x7feb2046e128>, pull_status={0: BSS(tag='putBatch-__rsk#202206081003093650570_intersection_0_0#hash.42634ddb254c23eb01e6.host_prvkey_ids#fit#host#60001#guest#60002-0', is_finished=False, total_batches=-1, batch_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 12052, 2: 12052, 3: 12052, 4: 12052, 5: 12052, 6: 12052, 7: 12052, 8: 12052, 9: 12052, 10: 12052, 11: 12052, 12: 12052, 13: 12052, 14: 12052, 15: 12052, 16: 12052, 17: 12052, 18: 12052, 19: 12052, 20: 12052, 21: 12052, 22: 12052, 23: 12052, 24: 12052, 25: 12052, 26: 12052, 27: 12052, 28: 12052, 29: 12052, 30: 12052, 31: 12052, 32: 12052, 33: 12052, 34: 12052, 35: 12052, 36: 12052, 37: 12052, 38: 12052, 39: 12052, 40: 12052, 41: 12052, 42: 12052, 43: 12052, 44: 12052, 45: 12052, 46: 12052, 47: 12052, 48: 12052, 49: 12052, 50: 12052}), total_streams=-1, stream_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 120520, 2: 120520, 3: 120520, 4: 120520, 5: 120520}), stream_seq_to_batch_seq=defaultdict(<class 'int'>, {1: 10, 2: 20, 3: 30, 4: 40, 5: 50}), total_pairs=602600, data_type='rollpair'), 1: BSS(tag='putBatch-__rsk#202206081003093650570_intersection_0_0#hash.42634ddb254c23eb01e6.host_prvkey_ids#fit#host#60001#guest#60002-1', is_finished=False, total_batches=-1, batch_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 12052, 2: 12052, 3: 12052, 4: 12052, 5: 12052, 6: 12052, 7: 12052, 8: 12052, 9: 12052, 10: 12052, 11: 12052, 12: 12052, 13: 12052, 14: 12052, 15: 12052, 16: 12052, 17: 12052, 18: 12052, 19: 12052, 20: 12052, 21: 12052, 22: 12052, 23: 12052, 24: 12052, 25: 12052, 26: 12052, 27: 12052, 28: 12052, 29: 12052, 30: 12052, 31: 12052, 32: 12052, 33: 12052, 34: 12052, 35: 12052, 36: 12052, 37: 12052, 38: 12052, 39: 12052, 40: 12052, 41: 12052, 42: 12052, 43: 12052, 44: 12052, 45: 12052, 46: 12052, 47: 12052, 48: 12052, 49: 12052, 50: 12052}), total_streams=-1, stream_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 120520, 2: 120520, 3: 120520, 4: 120520, 5: 120520}), stream_seq_to_batch_seq=defaultdict(<class 'int'>, {1: 10, 2: 20, 3: 30, 4: 40, 5: 50}), total_pairs=602600, data_type='rollpair'), 2: BSS(tag='putBatch-__rsk#202206081003093650570_intersection_0_0#hash.42634ddb254c23eb01e6.host_prvkey_ids#fit#host#60001#guest#60002-2', is_finished=False, total_batches=-1, batch_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 12052, 2: 12052, 3: 12052, 4: 12052, 5: 12052, 6: 12052, 7: 12052, 8: 12052, 9: 12052, 10: 12052, 11: 12052, 12: 12052, 13: 12052, 14: 12052, 15: 12052, 16: 12052, 17: 12052, 18: 12052, 19: 12052, 20: 12052, 21: 12052, 22: 12052, 23: 12052, 24: 12052, 25: 12052, 26: 12052, 27: 12052, 28: 12052, 29: 12052, 30: 12052, 31: 12052, 32: 12052, 33: 12052, 34: 12052, 35: 12052, 36: 12052, 37: 12052, 38: 12052, 39: 12052, 40: 12052, 41: 12052, 42: 12052, 43: 12052, 44: 12052, 45: 12052, 46: 12052, 47: 12052, 48: 12052, 49: 12052, 50: 12052}), total_streams=-1, stream_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 120520, 2: 120520, 3: 120520, 4: 120520, 5: 120520}), stream_seq_to_batch_seq=defaultdict(<class 'int'>, {1: 10, 2: 20, 3: 30, 4: 40, 5: 50}), total_pairs=602600, data_type='rollpair'), 3: BSS(tag='putBatch-__rsk#202206081003093650570_intersection_0_0#hash.42634ddb254c23eb01e6.host_prvkey_ids#fit#host#60001#guest#60002-3', is_finished=False, total_batches=-1, batch_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 12052, 2: 12052, 3: 12052, 4: 12052, 5: 12052, 6: 12052, 7: 12052, 8: 12052, 9: 12052, 10: 12052, 11: 12052, 12: 12052, 13: 12052, 14: 12052, 15: 12052, 16: 12052, 17: 12052, 18: 12052, 19: 12052, 20: 12052, 21: 12052, 22: 12052, 23: 12052, 24: 12052, 25: 12052, 26: 12052, 27: 12052, 28: 12052, 29: 12052, 30: 12052, 31: 12052, 32: 12052, 33: 12052, 34: 12052, 35: 12052, 36: 12052, 37: 12052, 38: 12052, 39: 12052, 40: 12052, 41: 12052, 42: 12052, 43: 12052, 44: 12052, 45: 12052, 46: 12052, 47: 12052, 48: 12052, 49: 12052, 50: 12052}), total_streams=-1, stream_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 120520, 2: 120520, 3: 120520, 4: 120520, 5: 120520}), stream_seq_to_batch_seq=defaultdict(<class 'int'>, {1: 10, 2: 20, 3: 30, 4: 40, 5: 50}), total_pairs=602600, data_type='rollpair'), 4: BSS(tag='putBatch-__rsk#202206081003093650570_intersection_0_0#hash.42634ddb254c23eb01e6.host_prvkey_ids#fit#host#60001#guest#60002-4', is_finished=False, total_batches=-1, batch_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 12052, 2: 12052, 3: 12052, 4: 12052, 5: 12052, 6: 12052, 7: 12052, 8: 12052, 9: 12052, 10: 12052, 11: 12052, 12: 12052, 13: 12052, 14: 12052, 15: 12052, 16: 12052, 17: 12052, 18: 12052, 19: 12052, 20: 12052, 21: 12052, 22: 12052, 23: 12052, 24: 12052, 25: 12052, 26: 12052, 27: 12052, 28: 12052, 29: 12052, 30: 12052, 31: 12052, 32: 12052, 33: 12052, 34: 12052, 35: 12052, 36: 12052, 37: 12052, 38: 12052, 39: 12052, 40: 12052, 41: 12052, 42: 12052, 43: 12052, 44: 12052, 45: 12052, 46: 12052, 47: 12052, 48: 12052, 49: 12052, 50: 12052}), total_streams=-1, stream_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 120520, 2: 120520, 3: 120520, 4: 120520, 5: 120520}), stream_seq_to_batch_seq=defaultdict(<class 'int'>, {1: 10, 2: 20, 3: 30, 4: 40, 5: 50}), total_pairs=602600, data_type='rollpair'), 5: BSS(tag='putBatch-__rsk#202206081003093650570_intersection_0_0#hash.42634ddb254c23eb01e6.host_prvkey_ids#fit#host#60001#guest#60002-5', is_finished=False, total_batches=-1, batch_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 12052, 2: 12052, 3: 12052, 4: 12052, 5: 12052, 6: 12052, 7: 12052, 8: 12052, 9: 12052, 10: 12052, 11: 12052, 12: 12052, 13: 12052, 14: 12052, 15: 12052, 16: 12052, 17: 12052, 18: 12052, 19: 12052, 20: 12052, 21: 12052, 22: 12052, 23: 12052, 24: 12052, 25: 12052, 26: 12052, 27: 12052, 28: 12052, 29: 12052, 30: 12052, 31: 12052, 32: 12052, 33: 12052, 34: 12052, 35: 12052, 36: 12052, 37: 12052, 38: 12052, 39: 12052, 40: 12052, 41: 12052, 42: 12052, 43: 12052, 44: 12052, 45: 12052, 46: 12052, 47: 12052, 48: 12052, 49: 12052, 50: 12052}), total_streams=-1, stream_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 120520, 2: 120520, 3: 120520, 4: 120520, 5: 120520}), stream_seq_to_batch_seq=defaultdict(<class 'int'>, {1: 10, 2: 20, 3: 30, 4: 40, 5: 50}), total_pairs=602600, data_type='rollpair'), 6: BSS(tag='putBatch-__rsk#202206081003093650570_intersection_0_0#hash.42634ddb254c23eb01e6.host_prvkey_ids#fit#host#60001#guest#60002-6', is_finished=False, total_batches=-1, batch_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 12052, 2: 12052, 3: 12052, 4: 12052, 5: 12052, 6: 12052, 7: 12052, 8: 12052, 9: 12052, 10: 12052, 11: 12052, 12: 12052, 13: 12052, 14: 12052, 15: 12052, 16: 12052, 17: 12052, 18: 12052, 19: 12052, 20: 12052, 21: 12052, 22: 12052, 23: 12052, 24: 12052, 25: 12052, 26: 12052, 27: 12052, 28: 12052, 29: 12052, 30: 12052, 31: 12052, 32: 12052, 33: 12052, 34: 12052, 35: 12052, 36: 12052, 37: 12052, 38: 12052, 39: 12052, 40: 12052, 41: 12052, 42: 12052, 43: 12052, 44: 12052, 45: 12052, 46: 12052, 47: 12052, 48: 12052, 49: 12052, 50: 12052}), total_streams=-1, stream_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 120520, 2: 120520, 3: 120520, 4: 120520, 5: 120520}), stream_seq_to_batch_seq=defaultdict(<class 'int'>, {1: 10, 2: 20, 3: 30, 4: 40, 5: 50}), total_pairs=602600, data_type='rollpair'), 7: BSS(tag='putBatch-__rsk#202206081003093650570_intersection_0_0#hash.42634ddb254c23eb01e6.host_prvkey_ids#fit#host#60001#guest#60002-7', is_finished=False, total_batches=-1, batch_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 12052, 2: 12052, 3: 12052, 4: 12052, 5: 12052, 6: 12052, 7: 12052, 8: 12052, 9: 12052, 10: 12052, 11: 12052, 12: 12052, 13: 12052, 14: 12052, 15: 12052, 16: 12052, 17: 12052, 18: 12052, 19: 12052, 20: 12052, 21: 12052, 22: 12052, 23: 12052, 24: 12052, 25: 12052, 26: 12052, 27: 12052, 28: 12052, 29: 12052, 30: 12052, 31: 12052, 32: 12052, 33: 12052, 34: 12052, 35: 12052, 36: 12052, 37: 12052, 38: 12052, 39: 12052, 40: 12052, 41: 12052, 42: 12052, 43: 12052, 44: 12052, 45: 12052, 46: 12052, 47: 12052, 48: 12052, 49: 12052, 50: 12052}), total_streams=-1, stream_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 120520, 2: 120520, 3: 120520, 4: 120520, 5: 120520}), stream_seq_to_batch_seq=defaultdict(<class 'int'>, {1: 10, 2: 20, 3: 30, 4: 40, 5: 50}), total_pairs=602600, data_type='rollpair')}, last_cur_pairs=4820800, cur_pairs=4820800
Traceback (most recent call last):
  File "/data/projects/fate/fateflow/python/fate_flow/worker/task_executor.py", line 195, in _run_
    cpn_output = run_object.run(cpn_input)
  File "/data/projects/fate/fate/python/federatedml/model_base.py", line 236, in run
    self._run(cpn_input=cpn_input)
  File "/data/projects/fate/fate/python/federatedml/model_base.py", line 313, in _run
    this_data_output = func(*real_param)
  File "/data/projects/fate/fate/python/federatedml/statistic/intersect/intersect_model.py", line 231, in fit
    self.intersect_ids = self.intersection_obj.run_intersect(intersect_data)
  File "/data/projects/fate/fate/python/federatedml/statistic/intersect/rsa_intersect/rsa_intersect_base.py", line 199, in run_intersect
    intersect_ids = self.unified_calculation_process(data_instances)
  File "/data/projects/fate/fate/python/federatedml/statistic/intersect/rsa_intersect/rsa_intersect_guest.py", line 214, in unified_calculation_process
    host_prvkey_ids_list = self.get_host_prvkey_ids()
  File "/data/projects/fate/fate/python/federatedml/statistic/intersect/rsa_intersect/rsa_intersect_guest.py", line 29, in get_host_prvkey_ids
    host_prvkey_ids_list = self.transfer_variable.host_prvkey_ids.get(idx=-1)
  File "/data/projects/fate/fate/python/fate_arch/federation/transfer_variable.py", line 307, in get
    rtn = self.get_parties(parties=src_parties, suffix=suffix)
  File "/data/projects/fate/fate/python/fate_arch/federation/transfer_variable.py", line 241, in get_parties
    name=name, tag=tag, parties=parties, gc=self._get_gc
  File "/data/projects/fate/fate/python/fate_arch/federation/eggroll/_federation.py", line 56, in get
    raw_result = _get(name, tag, parties, self._rsc, gc)
  File "/data/projects/fate/fate/python/fate_arch/federation/eggroll/_federation.py", line 108, in _get
    v = future.result()
  File "/opt/rh/rh-python36/root/usr/lib64/python3.6/concurrent/futures/_base.py", line 425, in result
    return self.__get_result()
  File "/opt/rh/rh-python36/root/usr/lib64/python3.6/concurrent/futures/_base.py", line 384, in __get_result
    raise self._exception
  File "/data/projects/fate/eggroll/python/eggroll/core/datastructure/threadpool.py", line 51, in run
    result = self.fn(*self.args, **self.kwargs)
  File "/data/projects/fate/eggroll/python/eggroll/roll_site/roll_site.py", line 647, in _pull_one
    raise e
  File "/data/projects/fate/eggroll/python/eggroll/roll_site/roll_site.py", line 620, in _pull_one
    raise IOError(f"roll site pull waiting failed because there is no updated progress: rs_key={rs_key}, "
OSError: roll site pull waiting failed because there is no updated progress: rs_key=__rsk#202206081003093650570_intersection_0_0#hash.42634ddb254c23eb01e6.host_prvkey_ids#fit#host#60001#guest#60002, rs_header=<ErRollSiteHeader(roll_site_session_id='202206081003093650570_intersection_0_0', name='hash.42634ddb254c23eb01e6.host_prvkey_ids', tag='fit', src_role='host', src_party_id='60001', dst_role='guest', dst_party_id='60002', data_type='', options=[{}], total_partitions=-1, partition_id=-1, total_streams=-1, total_batches=-1, stream_seq=-1, batch_seq=-1, stage=) at 0x7feb2046e128>, pull_status={0: BSS(tag='putBatch-__rsk#202206081003093650570_intersection_0_0#hash.42634ddb254c23eb01e6.host_prvkey_ids#fit#host#60001#guest#60002-0', is_finished=False, total_batches=-1, batch_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 12052, 2: 12052, 3: 12052, 4: 12052, 5: 12052, 6: 12052, 7: 12052, 8: 12052, 9: 12052, 10: 12052, 11: 12052, 12: 12052, 13: 12052, 14: 12052, 15: 12052, 16: 12052, 17: 12052, 18: 12052, 19: 12052, 20: 12052, 21: 12052, 22: 12052, 23: 12052, 24: 12052, 25: 12052, 26: 12052, 27: 12052, 28: 12052, 29: 12052, 30: 12052, 31: 12052, 32: 12052, 33: 12052, 34: 12052, 35: 12052, 36: 12052, 37: 12052, 38: 12052, 39: 12052, 40: 12052, 41: 12052, 42: 12052, 43: 12052, 44: 12052, 45: 12052, 46: 12052, 47: 12052, 48: 12052, 49: 12052, 50: 12052}), total_streams=-1, stream_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 120520, 2: 120520, 3: 120520, 4: 120520, 5: 120520}), stream_seq_to_batch_seq=defaultdict(<class 'int'>, {1: 10, 2: 20, 3: 30, 4: 40, 5: 50}), total_pairs=602600, data_type='rollpair'), 1: BSS(tag='putBatch-__rsk#202206081003093650570_intersection_0_0#hash.42634ddb254c23eb01e6.host_prvkey_ids#fit#host#60001#guest#60002-1', is_finished=False, total_batches=-1, batch_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 12052, 2: 12052, 3: 12052, 4: 12052, 5: 12052, 6: 12052, 7: 12052, 8: 12052, 9: 12052, 10: 12052, 11: 12052, 12: 12052, 13: 12052, 14: 12052, 15: 12052, 16: 12052, 17: 12052, 18: 12052, 19: 12052, 20: 12052, 21: 12052, 22: 12052, 23: 12052, 24: 12052, 25: 12052, 26: 12052, 27: 12052, 28: 12052, 29: 12052, 30: 12052, 31: 12052, 32: 12052, 33: 12052, 34: 12052, 35: 12052, 36: 12052, 37: 12052, 38: 12052, 39: 12052, 40: 12052, 41: 12052, 42: 12052, 43: 12052, 44: 12052, 45: 12052, 46: 12052, 47: 12052, 48: 12052, 49: 12052, 50: 12052}), total_streams=-1, stream_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 120520, 2: 120520, 3: 120520, 4: 120520, 5: 120520}), stream_seq_to_batch_seq=defaultdict(<class 'int'>, {1: 10, 2: 20, 3: 30, 4: 40, 5: 50}), total_pairs=602600, data_type='rollpair'), 2: BSS(tag='putBatch-__rsk#202206081003093650570_intersection_0_0#hash.42634ddb254c23eb01e6.host_prvkey_ids#fit#host#60001#guest#60002-2', is_finished=False, total_batches=-1, batch_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 12052, 2: 12052, 3: 12052, 4: 12052, 5: 12052, 6: 12052, 7: 12052, 8: 12052, 9: 12052, 10: 12052, 11: 12052, 12: 12052, 13: 12052, 14: 12052, 15: 12052, 16: 12052, 17: 12052, 18: 12052, 19: 12052, 20: 12052, 21: 12052, 22: 12052, 23: 12052, 24: 12052, 25: 12052, 26: 12052, 27: 12052, 28: 12052, 29: 12052, 30: 12052, 31: 12052, 32: 12052, 33: 12052, 34: 12052, 35: 12052, 36: 12052, 37: 12052, 38: 12052, 39: 12052, 40: 12052, 41: 12052, 42: 12052, 43: 12052, 44: 12052, 45: 12052, 46: 12052, 47: 12052, 48: 12052, 49: 12052, 50: 12052}), total_streams=-1, stream_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 120520, 2: 120520, 3: 120520, 4: 120520, 5: 120520}), stream_seq_to_batch_seq=defaultdict(<class 'int'>, {1: 10, 2: 20, 3: 30, 4: 40, 5: 50}), total_pairs=602600, data_type='rollpair'), 3: BSS(tag='putBatch-__rsk#202206081003093650570_intersection_0_0#hash.42634ddb254c23eb01e6.host_prvkey_ids#fit#host#60001#guest#60002-3', is_finished=False, total_batches=-1, batch_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 12052, 2: 12052, 3: 12052, 4: 12052, 5: 12052, 6: 12052, 7: 12052, 8: 12052, 9: 12052, 10: 12052, 11: 12052, 12: 12052, 13: 12052, 14: 12052, 15: 12052, 16: 12052, 17: 12052, 18: 12052, 19: 12052, 20: 12052, 21: 12052, 22: 12052, 23: 12052, 24: 12052, 25: 12052, 26: 12052, 27: 12052, 28: 12052, 29: 12052, 30: 12052, 31: 12052, 32: 12052, 33: 12052, 34: 12052, 35: 12052, 36: 12052, 37: 12052, 38: 12052, 39: 12052, 40: 12052, 41: 12052, 42: 12052, 43: 12052, 44: 12052, 45: 12052, 46: 12052, 47: 12052, 48: 12052, 49: 12052, 50: 12052}), total_streams=-1, stream_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 120520, 2: 120520, 3: 120520, 4: 120520, 5: 120520}), stream_seq_to_batch_seq=defaultdict(<class 'int'>, {1: 10, 2: 20, 3: 30, 4: 40, 5: 50}), total_pairs=602600, data_type='rollpair'), 4: BSS(tag='putBatch-__rsk#202206081003093650570_intersection_0_0#hash.42634ddb254c23eb01e6.host_prvkey_ids#fit#host#60001#guest#60002-4', is_finished=False, total_batches=-1, batch_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 12052, 2: 12052, 3: 12052, 4: 12052, 5: 12052, 6: 12052, 7: 12052, 8: 12052, 9: 12052, 10: 12052, 11: 12052, 12: 12052, 13: 12052, 14: 12052, 15: 12052, 16: 12052, 17: 12052, 18: 12052, 19: 12052, 20: 12052, 21: 12052, 22: 12052, 23: 12052, 24: 12052, 25: 12052, 26: 12052, 27: 12052, 28: 12052, 29: 12052, 30: 12052, 31: 12052, 32: 12052, 33: 12052, 34: 12052, 35: 12052, 36: 12052, 37: 12052, 38: 12052, 39: 12052, 40: 12052, 41: 12052, 42: 12052, 43: 12052, 44: 12052, 45: 12052, 46: 12052, 47: 12052, 48: 12052, 49: 12052, 50: 12052}), total_streams=-1, stream_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 120520, 2: 120520, 3: 120520, 4: 120520, 5: 120520}), stream_seq_to_batch_seq=defaultdict(<class 'int'>, {1: 10, 2: 20, 3: 30, 4: 40, 5: 50}), total_pairs=602600, data_type='rollpair'), 5: BSS(tag='putBatch-__rsk#202206081003093650570_intersection_0_0#hash.42634ddb254c23eb01e6.host_prvkey_ids#fit#host#60001#guest#60002-5', is_finished=False, total_batches=-1, batch_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 12052, 2: 12052, 3: 12052, 4: 12052, 5: 12052, 6: 12052, 7: 12052, 8: 12052, 9: 12052, 10: 12052, 11: 12052, 12: 12052, 13: 12052, 14: 12052, 15: 12052, 16: 12052, 17: 12052, 18: 12052, 19: 12052, 20: 12052, 21: 12052, 22: 12052, 23: 12052, 24: 12052, 25: 12052, 26: 12052, 27: 12052, 28: 12052, 29: 12052, 30: 12052, 31: 12052, 32: 12052, 33: 12052, 34: 12052, 35: 12052, 36: 12052, 37: 12052, 38: 12052, 39: 12052, 40: 12052, 41: 12052, 42: 12052, 43: 12052, 44: 12052, 45: 12052, 46: 12052, 47: 12052, 48: 12052, 49: 12052, 50: 12052}), total_streams=-1, stream_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 120520, 2: 120520, 3: 120520, 4: 120520, 5: 120520}), stream_seq_to_batch_seq=defaultdict(<class 'int'>, {1: 10, 2: 20, 3: 30, 4: 40, 5: 50}), total_pairs=602600, data_type='rollpair'), 6: BSS(tag='putBatch-__rsk#202206081003093650570_intersection_0_0#hash.42634ddb254c23eb01e6.host_prvkey_ids#fit#host#60001#guest#60002-6', is_finished=False, total_batches=-1, batch_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 12052, 2: 12052, 3: 12052, 4: 12052, 5: 12052, 6: 12052, 7: 12052, 8: 12052, 9: 12052, 10: 12052, 11: 12052, 12: 12052, 13: 12052, 14: 12052, 15: 12052, 16: 12052, 17: 12052, 18: 12052, 19: 12052, 20: 12052, 21: 12052, 22: 12052, 23: 12052, 24: 12052, 25: 12052, 26: 12052, 27: 12052, 28: 12052, 29: 12052, 30: 12052, 31: 12052, 32: 12052, 33: 12052, 34: 12052, 35: 12052, 36: 12052, 37: 12052, 38: 12052, 39: 12052, 40: 12052, 41: 12052, 42: 12052, 43: 12052, 44: 12052, 45: 12052, 46: 12052, 47: 12052, 48: 12052, 49: 12052, 50: 12052}), total_streams=-1, stream_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 120520, 2: 120520, 3: 120520, 4: 120520, 5: 120520}), stream_seq_to_batch_seq=defaultdict(<class 'int'>, {1: 10, 2: 20, 3: 30, 4: 40, 5: 50}), total_pairs=602600, data_type='rollpair'), 7: BSS(tag='putBatch-__rsk#202206081003093650570_intersection_0_0#hash.42634ddb254c23eb01e6.host_prvkey_ids#fit#host#60001#guest#60002-7', is_finished=False, total_batches=-1, batch_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 12052, 2: 12052, 3: 12052, 4: 12052, 5: 12052, 6: 12052, 7: 12052, 8: 12052, 9: 12052, 10: 12052, 11: 12052, 12: 12052, 13: 12052, 14: 12052, 15: 12052, 16: 12052, 17: 12052, 18: 12052, 19: 12052, 20: 12052, 21: 12052, 22: 12052, 23: 12052, 24: 12052, 25: 12052, 26: 12052, 27: 12052, 28: 12052, 29: 12052, 30: 12052, 31: 12052, 32: 12052, 33: 12052, 34: 12052, 35: 12052, 36: 12052, 37: 12052, 38: 12052, 39: 12052, 40: 12052, 41: 12052, 42: 12052, 43: 12052, 44: 12052, 45: 12052, 46: 12052, 47: 12052, 48: 12052, 49: 12052, 50: 12052}), total_streams=-1, stream_seq_to_pair_counter=defaultdict(<class 'int'>, {1: 120520, 2: 120520, 3: 120520, 4: 120520, 5: 120520}), stream_seq_to_batch_seq=defaultdict(<class 'int'>, {1: 10, 2: 20, 3: 30, 4: 40, 5: 50}), total_pairs=602600, data_type='rollpair')}, last_cur_pairs=4820800, cur_pairs=4820800

修改eggroll的pull interval为12分钟后还是报错,报错信息如下:

[ERROR] [2022-06-09 18:12:44,832] [202206091447237247500] [65120:139654636889856] - [_federation._remote_exception_re_raise] [line:148]: [federation.eggroll.remote.hash.f8e28fbc38ac27fc73e5.guest_pubkey_ids.fit]future to remote to party: ('host', '60001') fail, terminating process(pid=65120)
Traceback (most recent call last):
  File "/data/projects/fate/eggroll/python/eggroll/core/client.py", line 84, in sync_send
    response = _command_stub.call(request.to_proto())
  File "/opt/app-root/lib/python3.6/site-packages/grpc/_channel.py", line 604, in __call__
    return _end_unary_response_blocking(state, call, False, None)
  File "/opt/app-root/lib/python3.6/site-packages/grpc/_channel.py", line 506, in _end_unary_response_blocking
    raise _Rendezvous(state, None, None, deadline)
grpc._channel._Rendezvous: <_Rendezvous of RPC that terminated with:
        status = StatusCode.UNKNOWN
        details = "Exception calling application:

==== detail start, at 20220609.181244.830 ====
Traceback (most recent call last):
  File "/data/projects/fate/eggroll/python/eggroll/core/utils.py", line 187, in wrapper
    return func(*args, **kw)
  File "/data/projects/fate/eggroll/python/eggroll/roll_pair/egg_pair.py", line 658, in run_task
    value=self.functor_serdes.serialize(f(task)))
  File "/data/projects/fate/eggroll/python/eggroll/roll_site/roll_site.py", line 512, in _push_partition
    raise exception
  File "/data/projects/fate/eggroll/python/eggroll/roll_site/roll_site.py", line 493, in _push_partition
    stub.push(bs_helper.generate_packet(batch_stream_data, cur_retry), timeout=per_stream_timeout)
  File "/opt/app-root/lib/python3.6/site-packages/grpc/_channel.py", line 737, in __call__
    return _end_unary_response_blocking(state, call, False, None)
  File "/opt/app-root/lib/python3.6/site-packages/grpc/_channel.py", line 506, in _end_unary_response_blocking
    raise _Rendezvous(state, None, None, deadline)
grpc._channel._Rendezvous: <_Rendezvous of RPC that terminated with:
        status = StatusCode.DEADLINE_EXCEEDED
        details = "Deadline Exceeded"
        status = StatusCode.DEADLINE_EXCEEDED
        details = "Deadline Exceeded"
        debug_error_string = "{"created":"@1654769260.246041248","description":"Error received from peer ipv4:10.0.50.57:9370","file":"src/core/lib/surface/call.cc","file_line":1055,"grpc_message":"Deadline Exceeded","grpc_status":4}"
>

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/data/projects/fate/eggroll/python/eggroll/core/utils.py", line 187, in wrapper
    return func(*args, **kw)
  File "/data/projects/fate/eggroll/python/eggroll/core/command/command_service.py", line 33, in call
    kwargs=getattr(command_request, '_kwargs'))
  File "/data/projects/fate/eggroll/python/eggroll/core/command/command_router.py", line 94, in dispatch
    raise e
  File "/data/projects/fate/eggroll/python/eggroll/core/command/command_router.py", line 91, in dispatch
    call_result = _method(_instance, *deserialized_args)
  File "/data/projects/fate/eggroll/python/eggroll/core/utils.py", line 194, in wrapper
    raise RuntimeError(msg)
RuntimeError:

==== detail start, at 20220609.181244.829 ====
Traceback (most recent call last):
  File "/data/projects/fate/eggroll/python/eggroll/core/utils.py", line 187, in wrapper
    return func(*args, **kw)
  File "/data/projects/fate/eggroll/python/eggroll/roll_pair/egg_pair.py", line 658, in run_task
    value=self.functor_serdes.serialize(f(task)))
  File "/data/projects/fate/eggroll/python/eggroll/roll_site/roll_site.py", line 512, in _push_partition
    raise exception
  File "/data/projects/fate/eggroll/python/eggroll/roll_site/roll_site.py", line 493, in _push_partition
    stub.push(bs_helper.generate_packet(batch_stream_data, cur_retry), timeout=per_stream_timeout)
  File "/opt/app-root/lib/python3.6/site-packages/grpc/_channel.py", line 737, in __call__
    return _end_unary_response_blocking(state, call, False, None)
  File "/opt/app-root/lib/python3.6/site-packages/grpc/_channel.py", line 506, in _end_unary_response_blocking
    raise _Rendezvous(state, None, None, deadline)
grpc._channel._Rendezvous: <_Rendezvous of RPC that terminated with:
        status = StatusCode.DEADLINE_EXCEEDED
        details = "Deadline Exceeded"
        debug_error_string = "{"created":"@1654769260.246041248","description":"Error received from peer ipv4:10.0.50.57:9370","file":"src/core/lib/surface/call.cc","file_line":1055,"grpc_message":"Deadline Exceeded","grpc_status":4}"
>

==== detail end ====



==== detail end ====


"
        debug_error_string = "{"created":"@1654769564.831813980","description":"Error received from peer ipv4:192.167.0.2:43816","file":"src/core/lib/surface/call.cc","file_line":1055,"grpc_message":"Exception calling application: \n\n==== detail start, at 20220609.181244.830 ====\nTraceback (most recent call last):\n  File "/data/projects/fate/eggroll/python/eggroll/core/utils.py", line 187, in wrapper\n    return func(*args, **kw)\n  File "/data/projects/fate/eggroll/python/eggroll/roll_pair/egg_pair.py", line 658, in run_task\n    value=self.functor_serdes.serialize(f(task)))\n  File "/data/projects/fate/eggroll/python/eggroll/roll_site/roll_site.py", line 512, in _push_partition\n    raise exception\n  File "/data/projects/fate/eggroll/python/eggroll/roll_site/roll_site.py", line 493, in _push_partition\n    stub.push(bs_helper.generate_packet(batch_stream_data, cur_retry), timeout=per_stream_timeout)\n  File "/opt/app-root/lib/python3.6/site-packages/grpc/_channel.py", line 737, in __call__\n    return _end_unary_response_blocking(state, call, False, None)\n  File "/opt/app-root/lib/python3.6/site-packages/grpc/_channel.py", line 506, in _end_unary_response_blocking\n    raise _Rendezvous(state, None, None, deadline)\ngrpc._channel._Rendezvous: <_Rendezvous of RPC that terminated with:\n\tstatus = StatusCode.DEADLINE_EXCEEDED\n\tdetails = "Deadline Exceeded"\n\tdebug_error_string = "{"created":"@1654769260.246041248","description":"Error received from peer ipv4:10.0.50.57:9370","file":"src/core/lib/surface/call.cc","file_line":1055,"grpc_message":"Deadline Exceeded","grpc_status":4}"\n>\n\nDuring handling of the above exception, another exception occurred:\n\nTraceback (most recent call last):\n  File "/data/projects/fate/eggroll/python/eggroll/core/utils.py", line 187, in wrapper\n    return func(*args, **kw)\n  File "/data/projects/fate/eggroll/python/eggroll/core/command/command_service.py", line 33, in call\n    kwargs=getattr(command_request, '_kwargs'))\n  File "/data/projects/fate/eggroll/python/eggroll/core/command/command_router.py", line 94, in dispatch\n    raise e\n  File "/data/projects/fate/eggroll/python/eggroll/core/command/command_router.py", line 91, in dispatch\n    call_result = _method(_instance, *deserialized_args)\n  File "/data/projects/fate/eggroll/python/eggroll/core/utils.py", line 194, in wrapper\n    raise RuntimeError(msg)\nRuntimeError: \n\n==== detail start, at 20220609.181244.829 ====\nTraceback (most recent call last):\n  File "/data/projects/fate/eggroll/python/eggroll/core/utils.py", line 187, in wrapper\n    return func(*args, **kw)\n  File "/data/projects/fate/eggroll/python/eggroll/roll_pair/egg_pair.py", line 658, in run_task\n    value=self.functor_serdes.serialize(f(task)))\n  File "/data/projects/fate/eggroll/python/eggroll/roll_site/roll_site.py", line 512, in _push_partition\n    raise exception\n  File "/data/projects/fate/eggroll/python/eggroll/roll_site/roll_site.py", line 493, in _push_partition\n    stub.push(bs_helper.generate_packet(batch_stream_data, cur_retry), timeout=per_stream_timeout)\n  File "/opt/app-root/lib/python3.6/site-packages/grpc/_channel.py", line 737, in __call__\n    return _end_unary_response_blocking(state, call, False, None)\n  File "/opt/app-root/lib/python3.6/site-packages/grpc/_channel.py", line 506, in _end_unary_response_blocking\n    raise _Rendezvous(state, None, None, deadline)\ngrpc._channel._Rendezvous: <_Rendezvous of RPC that terminated with:\n\tstatus = StatusCode.DEADLINE_EXCEEDED\n\tdetails = "Deadline Exceeded"\n\tdebug_error_string = "{"created":"@1654769260.246041248","description":"Error received from peer ipv4:10.0.50.57:9370","file":"src/core/lib/surface/call.cc","file_line":1055,"grpc_message":"Deadline Exceeded","grpc_status":4}"\n>\n\n==== detail end ====\n\n\n\n==== detail end ====\n\n","grpc_status":2}"
>

During handling of the above exception, another exception occurred:

Traceback (most recent call last):


Traceback (most recent call last):
  File "/data/projects/fate/fate/python/fate_arch/federation/eggroll/_federation.py", line 141, in _remote_exception_re_raise
    f.result()
  File "/opt/rh/rh-python36/root/usr/lib64/python3.6/concurrent/futures/_base.py", line 425, in result
    return self.__get_result()
  File "/opt/rh/rh-python36/root/usr/lib64/python3.6/concurrent/futures/_base.py", line 384, in __get_result
    raise self._exception
  File "/data/projects/fate/eggroll/python/eggroll/core/datastructure/threadpool.py", line 51, in run
    result = self.fn(*self.args, **self.kwargs)
  File "/data/projects/fate/eggroll/python/eggroll/roll_site/roll_site.py", line 521, in _push_rollpair
    rp.with_stores(_push_partition, options={"__op": "push_partition"})
  File "/data/projects/fate/eggroll/python/eggroll/core/aspects.py", line 30, in wrapper
    result = func(*args, **kwargs)
  File "/data/projects/fate/eggroll/python/eggroll/roll_pair/roll_pair.py", line 1141, in with_stores
    ret_pair = future.result()[0]
  File "/opt/rh/rh-python36/root/usr/lib64/python3.6/concurrent/futures/_base.py", line 432, in result
    return self.__get_result()
  File "/opt/rh/rh-python36/root/usr/lib64/python3.6/concurrent/futures/_base.py", line 384, in __get_result
    raise self._exception
  File "/data/projects/fate/eggroll/python/eggroll/core/datastructure/threadpool.py", line 51, in run
    result = self.fn(*self.args, **self.kwargs)
  File "/data/projects/fate/eggroll/python/eggroll/core/client.py", line 97, in sync_send
    raise CommandCallError(command_uri, endpoint, e)
eggroll.core.client.CommandCallError: ('Failed to call command: CommandURI(_uri=v1/egg-pair/runTask) to endpoint: nodemanager:43816, caused by: ', <_Rendezvous of RPC that terminated with:
        status = StatusCode.UNKNOWN
        details = "Exception calling application:

==== detail start, at 20220609.181244.830 ====
Traceback (most recent call last):

==== detail start, at 20220609.181244.830 ====
Traceback (most recent call last):
  File "/data/projects/fate/eggroll/python/eggroll/core/utils.py", line 187, in wrapper
    return func(*args, **kw)
  File "/data/projects/fate/eggroll/python/eggroll/roll_pair/egg_pair.py", line 658, in run_task
    value=self.functor_serdes.serialize(f(task)))
  File "/data/projects/fate/eggroll/python/eggroll/roll_site/roll_site.py", line 512, in _push_partition
    raise exception
  File "/data/projects/fate/eggroll/python/eggroll/roll_site/roll_site.py", line 493, in _push_partition
    stub.push(bs_helper.generate_packet(batch_stream_data, cur_retry), timeout=per_stream_timeout)
  File "/opt/app-root/lib/python3.6/site-packages/grpc/_channel.py", line 737, in __call__
    return _end_unary_response_blocking(state, call, False, None)
  File "/opt/app-root/lib/python3.6/site-packages/grpc/_channel.py", line 506, in _end_unary_response_blocking
    raise _Rendezvous(state, None, None, deadline)
grpc._channel._Rendezvous: <_Rendezvous of RPC that terminated with:
        status = StatusCode.DEADLINE_EXCEEDED
        details = "Deadline Exceeded"
        debug_error_string = "{"created":"@1654769260.246041248","description":"Error received from peer ipv4:10.0.50.57:9370","file":"src/core/lib/surface/call.cc","file_line":1055,"grpc_message":"Deadline Exceeded","grpc_status":4}"
>

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/data/projects/fate/eggroll/python/eggroll/core/utils.py", line 187, in wrapper
    return func(*args, **kw)
  File "/data/projects/fate/eggroll/python/eggroll/core/command/command_service.py", line 33, in call
    kwargs=getattr(command_request, '_kwargs'))
  File "/data/projects/fate/eggroll/python/eggroll/core/command/command_router.py", line 94, in dispatch
    raise e
  File "/data/projects/fate/eggroll/python/eggroll/core/command/command_router.py", line 91, in dispatch
    raise e
  File "/data/projects/fate/eggroll/python/eggroll/core/command/command_router.py", line 91, in dispatch
    call_result = _method(_instance, *deserialized_args)
  File "/data/projects/fate/eggroll/python/eggroll/core/utils.py", line 194, in wrapper
    raise RuntimeError(msg)
RuntimeError:

==== detail start, at 20220609.181244.829 ====
Traceback (most recent call last):
  File "/data/projects/fate/eggroll/python/eggroll/core/utils.py", line 187, in wrapper
    return func(*args, **kw)
  File "/data/projects/fate/eggroll/python/eggroll/roll_pair/egg_pair.py", line 658, in run_task
    value=self.functor_serdes.serialize(f(task)))
  File "/data/projects/fate/eggroll/python/eggroll/roll_site/roll_site.py", line 512, in _push_partition
    raise exception
  File "/data/projects/fate/eggroll/python/eggroll/roll_site/roll_site.py", line 493, in _push_partition
    stub.push(bs_helper.generate_packet(batch_stream_data, cur_retry), timeout=per_stream_timeout)
  File "/opt/app-root/lib/python3.6/site-packages/grpc/_channel.py", line 737, in __call__
    return _end_unary_response_blocking(state, call, False, None)
  File "/opt/app-root/lib/python3.6/site-packages/grpc/_channel.py", line 506, in _end_unary_response_blocking
    raise _Rendezvous(state, None, None, deadline)
grpc._channel._Rendezvous: <_Rendezvous of RPC that terminated with:
        status = StatusCode.DEADLINE_EXCEEDED
        details = "Deadline Exceeded"
        debug_error_string = "{"created":"@1654769260.246041248","description":"Error received from peer ipv4:10.0.50.57:9370","file":"src/core/lib/surface/call.cc","file_line":1055,"grpc_message":"Deadline Exceeded","grpc_status":4}"
>

==== detail end ====

==== detail end ====

"
        debug_error_string = "{"created":"@1654769564.831813980","description":"Error received from peer ipv4:192.167.0.2:43816","file":"src/core/lib/surface/call.cc","file_line":1055,"grpc_message":"Exception calling application: \n\n==== detail start, at 20220609.181244.830 ====\nTraceback (most recent call last):\n  File "/data/projects/fate/eggroll/python/eggroll/core/utils.py", line 187, in wrapper\n    return func(*args, **kw)\n  File "/data/projects/fate/eggroll/python/eggroll/roll_pair/egg_pair.py", line 658, in run_task\n    value=self.functor_serdes.serialize(f(task)))\n  File "/data/projects/fate/eggroll/python/eggroll/roll_site/roll_site.py", line 512, in _push_partition\n    raise exception\n  File "/data/projects/fate/eggroll/python/eggroll/roll_site/roll_site.py", line 493, in _push_partition\n    stub.push(bs_helper.generate_packet(batch_stream_data, cur_retry), timeout=per_stream_timeout)\n  File "/opt/app-root/lib/python3.6/site-packages/grpc/_channel.py", line 737, in __call__\n    return _end_unary_response_blocking(state, call, False, None)\n  File "/opt/app-root/lib/python3.6/site-packages/grpc/_channel.py", line 506, in _end_unary_response_blocking\n    raise _Rendezvous(state, None, None, deadline)\ngrpc._channel._Rendezvous: <_Rendezvous of RPC that terminated with:\n\tstatus = StatusCode.DEADLINE_EXCEEDED\n\tdetails = "Deadline Exceeded"\n\tdebug_error_string = "{"created":"@1654769260.246041248","description":"Error received from peer ipv4:10.0.50.57:9370","file":"src/core/lib/surface/call.cc","file_line":1055,"grpc_message":"Deadline Exceeded","grpc_status":4}"\n>\n\nDuring handling of the above exception, another exception occurred:\n\nTraceback (most recent call last):\n  File "/data/projects/fate/eggroll/python/eggroll/core/utils.py", line 187, in wrapper\n    return func(*args, **kw)\n  File "/data/projects/fate/eggroll/python/eggroll/core/command/command_service.py", line 33, in call\n    kwargs=getattr(command_request, '_kwargs'))\n  File "/data/projects/fate/eggroll/python/eggroll/core/command/command_router.py", line 94, in dispatch\n    raise e\n  File "/data/projects/fate/eggroll/python/eggroll/core/command/command_router.py", line 91, in dispatch\n    call_result = _method(_instance, *deserialized_args)\n  File "/data/projects/fate/eggroll/python/eggroll/core/utils.py", line 194, in wrapper\n    raise RuntimeError(msg)\nRuntimeError: \n\n==== detail start, at 20220609.181244.829 ====\nTraceback (most recent call last):\n  File "/data/projects/fate/eggroll/python/eggroll/core/utils.py", line 187, in wrapper\n    return func(*args, **kw)\n  File "/data/projects/fate/eggroll/python/eggroll/roll_pair/egg_pair.py", line 658, in run_task\n    value=self.functor_serdes.serialize(f(task)))\n  File "/data/projects/fate/eggroll/python/eggroll/roll_site/roll_site.py", line 512, in _push_partition\n    raise exception\n  File "/data/projects/fate/eggroll/python/eggroll/roll_site/roll_site.py", line 493, in _push_partition\n    stub.push(bs_helper.generate_packet(batch_stream_data, cur_retry), timeout=per_stream_timeout)\n  File "/opt/app-root/lib/python3.6/site-packages/grpc/_channel.py", line 737, in __call__\n    return _end_unary_response_blocking(state, call, False, None)\n  File "/opt/app-root/lib/python3.6/site-packages/grpc/_channel.py", line 506, in _end_unary_response_blocking\n    raise _Rendezvous(state, None, None, deadline)\ngrpc._channel._Rendezvous: <_Rendezvous of RPC that terminated with:\n\tstatus = StatusCode.DEADLINE_EXCEEDED\n\tdetails = "Deadline Exceeded"\n\tdebug_error_string = "{"created":"@1654769260.246041248","description":"Error received from peer ipv4:10.0.50.57:9370","file":"src/core/lib/surface/call.cc","file_line":1055,"grpc_message":"Deadline Exceeded","grpc_status":4}"\n>\n\n==== detail end ====\n\n\n\n==== detail end ====\n\n","grpc_status":2}"
>)

请问有什么解决方法:

ykcirh avatar Jun 10 '22 03:06 ykcirh