It appears that text-embedding-3-small has recently (at least as of March 16) stopped working for text containing special tokens:
Time zone: PDT
"""Minimal repro: causes 500 on text-embedding-3-small but not other models."""
import asyncio
import openai
from dotenv import load_dotenv
TOKENS = [
"<|endoftext|>",
"<|im_start|>",
"<|im_end|>",
"<|fim_prefix|>",
"<|fim_middle|>",
"<|fim_suffix|>",
"<|endofprompt|>",
]
MODELS = ["text-embedding-3-small", "text-embedding-3-large", "text-embedding-ada-002"]
async def main():
client = openai.AsyncOpenAI()
for token in TOKENS:
text = f"Here is some text.{token}Here is some more text."
for model in MODELS:
try:
await client.embeddings.create(input=text, model=model)
print(f"{token:25s} {model}: SUCCESS")
except openai.InternalServerError as e:
print(f"{token:25s} {model}: 500 ERROR")
await client.close()
if __name__ == "__main__":
load_dotenv()
asyncio.run(main())
Output:
<|endoftext|> text-embedding-3-small: 500 ERROR
<|endoftext|> text-embedding-3-large: SUCCESS
<|endoftext|> text-embedding-ada-002: SUCCESS
<|im_start|> text-embedding-3-small: 500 ERROR
<|im_start|> text-embedding-3-large: SUCCESS
<|im_start|> text-embedding-ada-002: SUCCESS
<|im_end|> text-embedding-3-small: 500 ERROR
<|im_end|> text-embedding-3-large: SUCCESS
<|im_end|> text-embedding-ada-002: SUCCESS
<|fim_prefix|> text-embedding-3-small: 500 ERROR
<|fim_prefix|> text-embedding-3-large: SUCCESS
<|fim_prefix|> text-embedding-ada-002: SUCCESS
<|fim_middle|> text-embedding-3-small: 500 ERROR
<|fim_middle|> text-embedding-3-large: SUCCESS
<|fim_middle|> text-embedding-ada-002: SUCCESS
<|fim_suffix|> text-embedding-3-small: 500 ERROR
<|fim_suffix|> text-embedding-3-large: SUCCESS
<|fim_suffix|> text-embedding-ada-002: SUCCESS
<|endofprompt|> text-embedding-3-small: 500 ERROR
<|endofprompt|> text-embedding-3-large: SUCCESS
<|endofprompt|> text-embedding-ada-002: SUCCESS