Hi, i am using streaming openai api. but when answer is long, the response break.
Here is my code
def call_openai_streaming(
client, gpt_model_name, messages, temperature, response_format, final_response
):
openai_stream = client.chat.completions.create(
model=gpt_model_name,
messages=messages,
stream=True,
temperature=temperature,
response_format=response_format,
)
for chunk in openai_stream:
if chunk.choices[0].delta.content is not None:
final_response += chunk.choices[0].delta.content
yield chunk.choices[0].delta.content
def qa_conversation(config, conversation_log, request):
messages = [
{"role": "system", "content": ....},
{"role": "user", "content": ....},
]
@traceable(run_type="chain", name=f"qa_conversation")
def llm_call(messages):
final_response = ""
client = OpenAI(api_key=config["OPENAI_KEY"])
yield from call_openai_streaming(
client, "gpt-4", messages, 0.0, None, final_response
)
return llm_call(messages)
fastapi
@app.post("/generate_assistant_response")
def assistant_response(request: Request):
headers = {
"Content-Type": "text/plain",
"Transfer-Encoding": "chunked",
"Connection": "Transfer-Encoding",
}
conversation_log = request.conversation_log
intent_type = request.intent.get("type", None)
if intent_type == "qa":
generator = qa_conversation(config, request.conversation_log, request)
return StreamingResponse(generator, media_type="text/plain", headers=headers)
i am logging error via langmsith, and in that case langsmith show this error
Traceback (most recent call last):
File "/usr/local/lib/python3.10/site-packages/langsmith/run_helpers.py", line 443, in generator_wrapper
yield item
GeneratorExit
I am open to all sort of suggestion which can help to debug this issue in a better way. I only get error when answer is long, for short answer, there is no issue