UnboundLocalError: local variable 'self_reward_model' referenced before assignment
I just copied the demo to test the crate, but got UnboundLocalError: local variable 'self_reward_model' referenced before assignment
code:
import torch
from torch import Tensor
from self_rewarding_lm_pytorch import (
SelfRewardingTrainer,
create_mock_dataset
)
from x_transformers import TransformerWrapper, Decoder
transformer = TransformerWrapper(
num_tokens = 256,
max_seq_len = 1024,
attn_layers = Decoder(
dim = 512,
depth = 1,
heads = 8
)
)
sft_dataset = create_mock_dataset(100, lambda: (torch.randint(0, 256, (256,)), torch.tensor(1)))
prompt_dataset = create_mock_dataset(100, lambda: 'mock prompt')
def decode_tokens(tokens: Tensor) -> str:
decode_token = lambda token: str(chr(max(32, token)))
return ''.join(list(map(decode_token, tokens)))
def encode_str(seq_str: str) -> Tensor:
return Tensor(list(map(ord, seq_str)))
trainer = SelfRewardingTrainer(
transformer,
finetune_configs = dict(
train_sft_dataset = sft_dataset,
self_reward_prompt_dataset = prompt_dataset,
dpo_num_train_steps = 1000
),
tokenizer_decode = decode_tokens,
tokenizer_encode = encode_str,
accelerate_kwargs = dict(
cpu = True
)
)
trainer(overwrite_checkpoints = True)
Traceback stack
Traceback (most recent call last):
File "/raid/self_rewarding/train.py", line 46, in <module>
trainer(overwrite_checkpoints = True)
File "/home/miniconda3/envs/torch/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/miniconda3/envs/torch/lib/python3.10/site-packages/self_rewarding_lm_pytorch/self_rewarding_lm_pytorch.py", line 950, in forward
trainer()
File "/home/miniconda3/envs/torch/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/miniconda3/envs/torch/lib/python3.10/site-packages/self_rewarding_lm_pytorch/dpo.py", line 442, in forward
train_self_reward_dataset = self.dataset_generator()
File "/home/miniconda3/envs/torch/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, **kwargs)
File "/home/miniconda3/envs/torch/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/miniconda3/envs/torch/lib/python3.10/site-packages/self_rewarding_lm_pytorch/self_rewarding_lm_pytorch.py", line 577, in forward
rewards: List[Optional[float]] = [self.generate_reward(prompt, response) for response in candidate_responses]
File "/home/miniconda3/envs/torch/lib/python3.10/site-packages/self_rewarding_lm_pytorch/self_rewarding_lm_pytorch.py", line 577, in <listcomp>
rewards: List[Optional[float]] = [self.generate_reward(prompt, response) for response in candidate_responses]
File "/home/miniconda3/envs/torch/lib/python3.10/site-packages/self_rewarding_lm_pytorch/self_rewarding_lm_pytorch.py", line 509, in generate_reward
self_reward_model = self_reward_model.to(device)
UnboundLocalError: local variable 'self_reward_model' referenced before assignment
How could this happen? Could somebody help me? thx!
in generate_reward function, it's true that self_reward_model is referenced before use. but change self_reward_model = self_reward_model.to(device) to self_reward_model = self.self_reward_model.to(device) will cause other errors
def generate_reward(
self,
prompt: str,
response: str
) -> Optional[float]:
"""
main contribution of the paper is the logic in this function
in paper, they sample it 3 times and then average
"""
device = next(self.model.parameters()).device
template_fn = self.reward_config.template_fn
parse_reward = self.reward_config.parse_reward
reward_prompt_str = template_fn(prompt = prompt, response = response)
reward_prompt = self.tokenizer_encode(reward_prompt_str).to(device)
reward_prompt = repeat(reward_prompt, 'n -> b n', b = self.num_evals_to_average)
reward_prompt = reward_prompt.to(device)
**self_reward_model = self_reward_model.to(device)**
reward_responses = sample(
self_reward_model,
prompts = reward_prompt,
seq_len = self.generate_reward_max_seq_len,
temperature = self.eval_temperature,
filter_fn = self.eval_filter_fn,
filter_kwargs = self.eval_filter_kwargs
)
have you solved it?
have you solved it?
Sorry, I haven't. When I change the code to self_reward_model = self.self_reward_model.to(device), the program will be in an endless loop...