I got this error from openai api (both from python openai library and with direct request to the completions http url, when calling gpt-4o (current and new version) and gpt-4o-mini (see prompt below):
“Failed to generate output due to special tokens in the input.” (the error code was 400)
This error occurs only if I set max_tokens higher than 357 - otherwise the response is 200 with a valid generation. Therefore, the error message seems to be incorrect - the issue is not the input, but the tokens generated in the response, which appear to cause in error in the generation afterwards. However, when looking at the last tokens generated, I see that they also appear in the original prompt, so the source of the error is unclear.
I used temperature = 0. The prompt was:
system: “You are ChatGPT, a helpful assistant.”
user: “”"Code in the Open File:
async def infer(
self,
msg,
system=None,
context=None,
max_new_tokens=None,
model_name="",
full_msg=None,
):
if max_new_tokens is None:
max_new_tokens = self.max_new_tokens
msg_to_send = msg
if context:
if isinstance(context, list):
msg_to_send = f\"\"\"this is my code:
{new_line.join([get_code_block(c) for c in context])}
my question is: {msg}\"\"\"
elif isinstance(context, dict):
msg_to_send = f\"\"\"this is my code:
{get_code_block({'path': context['fileUri'], 'function': context['context']})}
my question is: {msg}\"\"\"
else:
msg_to_send = fֿ\"\"\"this is my code:
{context}
my question is: {msg}\"\"\"
if len(msg_to_send) > 20000:
msg_to_send = msg_to_send[-2000:]
messages = [{"role": "user", "content": msg_to_send}]
# msg_to_send = self.tok.apply_chat_template(messages, tokenize=False)
# msg_to_send = f\"\"\"<s>[INST] {msg_to_send} [/INST]
# \"\"\"
'''msg_to_send = f\"\"\"You are an exceptionally intelligent coding assistant that consistently delivers accurate and reliable responses to user instructions.
@@ Instruction
{msg_to_send}
@@ Response
\"\"\"'''
msg_to_send = f\"\"\"<|im_start|>system
You are Dolphin, a helpful AI coding assistant.<|im_end|>
<|im_start|>user
{msg_to_send}<|im_end|>
<|im_start|>assistant
\"\"\"
\"\"\"msg_to_send = f'''### Instruction:
{msg_to_send}
### Response:
'''\"\"\"
# msg_to_send = f'''GPT4 Correct User: {msg_to_send}<|end_of_turn|>GPT4 Correct Assistant:'''
if full_msg:
msg_to_send = full_msg
data = {
"inputs": msg_to_send,
"parameters": {
"max_new_tokens": max_new_tokens,
"temperature": self.temperature,
"details": True,
"top_p": self.top_p,
"seed": self.seed,
"stop": self.stop,
},
}
tries = LLM_INFERENCE_ATTEMPTS
sleep_time = 1
while tries >= 0:
try:
async with aiohttp.ClientSession() as session:
async with session.post(url=chat_url, json=data) as resp:
if resp.status == 200:
r = await resp.json()
else:
self.logger.error(f"Request to dolphin failed with status {resp.status}", analytics_properties={
"model": "dolphin",
"attempt": LLM_INFERENCE_ATTEMPTS - tries + 1,
})
await asyncio.sleep(sleep_time)
tries -= 1
sleep_time *= 2
continue
return r["generated_text"]
except Exception as e:
self.logger.error(f"Error occurred while generating response: {str(e)}", analytics_properties={
"model": "dolphin",
"attempt": LLM_INFERENCE_ATTEMPTS - tries + 1,
})
await asyncio.sleep(sleep_time)
sleep_time *= 2
tries -= 1
print("Generation error")
raise Exception("Generation error")
Diagnostics in the selected code:
selected code: except Exception as e:
Error message: Ensure that timeouts and cancellation are properly handled using asyncio.TimeoutError
and asyncio.CancelledError
.
Please provide the fix for the specific errors and warnings I am encountering. Assume the warnings and errors are correct and must be fixed. output the full fixed code, where the diff is highlighted such that removed lines begin with a ‘-’ and new lines begin with a ‘+’ (like a diff mode in pr).“”"