-
Notifications
You must be signed in to change notification settings - Fork 16
Open
Description
When I run:
RAYON_NUM_THREADS=6 CUDA_VISIBLE_DEVICES=0,1,2,3 python3 gen_model_answer_rest.py --model-path /models/LLAMA-2-series/llama-2-70b-chat --model-id llama-2-70b-chat --datastore-path ../datastore/datastore_chat_small.idx
I get:
(delete try and except)
Traceback (most recent call last):
File "/rest/llm_judge/gen_model_answer_rest.py", line 497, in <module>
run_eval(
File "/rest/llm_judge/gen_model_answer_rest.py", line 154, in run_eval
get_answers_func(
File "/usr/local/lib/python3.9/dist-packages/torch/utils/_contextlib.py", line 116, in decorate_context
return func(*args, **kwargs)
File "/rest/llm_judge/gen_model_answer_rest.py", line 231, in get_model_answers
output_ids, new_token, idx, _, start_time = rest_forward(
File "/rest/llm_judge/gen_model_answer_rest.py", line 57, in rest_forward
logits = initialize_logits(
File "/rest/llm_judge/../rest/model/utils.py", line 49, in initialize_logits
outputs, logits = model(
File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
File "/rest/llm_judge/../rest/model/rest_model.py", line 92, in forward
outputs = self.base_model.model(
File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
File "/rest/llm_judge/../rest/model/modeling_llama_kv.py", line 1044, in forward
layer_outputs = decoder_layer(
File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.9/dist-packages/accelerate/hooks.py", line 170, in new_forward
output = module._old_forward(*args, **kwargs)
File "/rest/llm_judge/../rest/model/modeling_llama_kv.py", line 720, in forward
hidden_states, self_attn_weights, present_key_value = self.self_attn(
File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1553, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/usr/local/lib/python3.9/dist-packages/torch/nn/modules/module.py", line 1562, in _call_impl
return forward_call(*args, **kwargs)
File "/usr/local/lib/python3.9/dist-packages/accelerate/hooks.py", line 170, in new_forward
output = module._old_forward(*args, **kwargs)
File "/rest/llm_judge/../rest/model/modeling_llama_kv.py", line 594, in forward
key_states = past_key_value[0].cat(key_states, dim=2)
File "/rest/llm_judge/../rest/model/kv_cache.py", line 66, in cat
dst.copy_(tensor)
RuntimeError: The size of tensor a (64) must match the size of tensor b (8) at non-singleton dimension 1
Metadata
Metadata
Assignees
Labels
No labels