How do i send streamed response from backend to frontend

Hey all,
i am using chat gpt’s text completion api , i want to send the strreamed response to frontend, my backend is python and front end is react. currently i am using the django streamhttpresponse, but i think its not working

return StreamingHttpResponse(response_data, content_type="application/json", status=status.HTTP_200_OK)

you guys have any idea about this???

Only Chat Completion endpoint allows streaming, I believe.

yes, i ma using chat completion api, sorry for the misunderstanding.

Is that Django 5? Are you using async / await? What is the error that you are experiencing?

ref: Request and response objects | Django documentation | Django

1 Like

yes this is django, and i am not using async or await.in my code for gpt


[details="Summary"]
ef chat_with_ai(context, message, profile_name):
    if profile_name is not None:

        # context.append({
        #     'role': 'user',
        #     'content': f"new responses will be based on our previous conversations.\nUser input: {message}"
        # })
        context.append({
            'role': 'user',
            'content':f"You will follow the conversation and respond to the queries asked by the 'user's content. You will act as the assistant.\nUser input: {message}"
        })
    else:
        print("///////////////////////////////////////////////")
        context.append({
            'role': 'user',
            'content': f"Past data are past conversations with ChatGPT, which offer replies exclusively in new lines and only on previous chat outcomes.\nUser input: {message}"
        })
    try:
        # Construct the conversation history with message objects
        conversation = [{"role": msg["role"], "content": msg["content"]} for msg in context]


        # Call the OpenAI API with streaming
        stream = openai.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=conversation,
            # frequency_penalty=0.0,
            temperature=1,
            max_tokens=1000,
            stop=None,
            stream=True
        )

        for chunk in stream:
            if chunk.choices[0].delta.content is not None:
                response_text = chunk.choices[0].delta.content

                yield response_text
        if response_text:

        # Return the response text
            return response_text
    except Exception as e:
        print(f"An error occurred: {e}")
        return {"status": False, "error": "Something went wrong"}

[/details]

on printing the response text it is coming as streamed in terminal, but when i try to sync it with apiview, and in postman it is not coming in a streamed way

1 Like

Does postman display the response text as a single block?

1 Like

yes . it is displayin the response as a single block
like this
{
“status”: true,
“response”: [
{
“role”: “user”,
“content”: “count 1 to 10”,
“created_at”: “2024-02-01T09:50:17.307313”
},
{
“role”: “user”,
“content”: “count 1 to 10”
},
{
“role”: “assistant”,
“content”: “Sure! I can count from 1 to 10 for you. Here you go:\n\n1, 2, 3, 4, 5, 6, 7, 8, 9, 10.\n\nIs there anything else I can assist you with?”
},
{
“role”: “user”,
“content”: “count 1 to 10”
},
{
“role”: “assistant”,
“content”: “Sure! Here’s the count from 1 to 10:\n\n1, 2, 3, 4, 5, 6, 7, 8, 9, 10.\n\nIs there anything else you would like me to do?”
},
{
“role”: “user”,
“content”: “count 1 to 10”
},
{
“role”: “assistant”,
“content”: “Sure! I can count from 1 to 10 for you. Here you go:\n\n1, 2, 3, 4, 5, 6, 7, 8, 9, 10.\n\nIs there anything else I can assist you with?”
},
{
“role”: “user”,
“content”: “count 1 to 10”
},
{
“role”: “assistant”,
“content”: “Certainly! I am here to assist you. Let’s count from 1 to 10:\n\n1, 2, 3, 4, 5, 6, 7, 8, 9, 10.\n\nIs there anything else you would like me to do?”
},
{
“role”: “user”,
“content”: “count 1 to 10”
},
{
“role”: “user”,
“content”: “You will follow the conversation and respond to the queries asked by the 'user’s content. You will act as the assistant.\nUser input: {‘role’: ‘user’, ‘content’: ‘count 1 to 10’, ‘created_at’: ‘2024-02-01T09:50:17.307313’}”
},
{
“role”: “assistant”,
“content”: “Certainly! I can count from 1 to 10 for you. Here you go:\n\n1, 2, 3, 4, 5, 6, 7, 8, 9, 10.\n\nIs there anything else I can assist you with?”
}
]
}

my api code look like this


class ChatGptView(APIView):
    permission_classes =[IsAuthenticated]
    def post(self, request):
        # try:
            data = request.data
            conversation_history = []
            user_prompt = data.get('user_prompt')
            # assistant_prompt = data.get('assistant_prompt')
            profile_id = data.get('profile_id')
            user_id = request.user.id
            regenerate = data.get('regenerate', False) 


            # validating request
            try:
                profile_data = Profile.objects.get(id=profile_id)
            except Profile.DoesNotExist:
                return error_response(error_message="Profile not found", status=status.HTTP_400_BAD_REQUEST)

            # Initialize user_input variable
            user_input = None

            if user_prompt is not None:
                user_input = {"role": "user", "content": user_prompt}
                conversation_history.append(user_input)
                print(conversation_history,"::::::>>>>>")
                save_chat_to_mongodb(profile_data, user_input, user_id, regenerate)

            recent_chats = get_recent_chats(int(profile_id))
            print(recent_chats,"======")
            # print("recent_chats",recent_chats,"::::")
            
            old_chats = recent_chats.get('chat', []) if 'chat' in recent_chats else []
            old_chats = [{k: v for k, v in item.items() if k != 'created_at'} for item in old_chats]
            if len(old_chats) > 0:
                conversation_history.extend(old_chats)
            
            profile_name = f"{profile_data.first_name} {profile_data.last_name}" if profile_data.last_name else profile_data.first_name
            # Add the AI response using chat_with_ai function
            
            ai_response_generator = chat_with_ai(conversation_history, user_input, profile_name)
        
            ai_response = ''.join(ai_response_generator)
            if ai_response:
                if isinstance(ai_response, str):
                    assistant_content = ai_response
                elif isinstance(ai_response, dict):
                    assistant_content = ai_response.get('content', '')
                else:
                    assistant_content = str(ai_response) 
                assistant = {"role": "assistant", "content": ai_response}
                save_chat_to_mongodb(profile_data, assistant, user_id, regenerate)
            conversation_history.append({"role": "assistant", "content": assistant_content})
            # print('Conversation History', conversation_history,"\n\n")

            # Fetch recent chats after AI response

            # print(old_chats,"\n::::")


                # print(conversation_history,"=!=!=!=!=!=!=!=!=!=!=!=!=!=!=!=!=!=!=!=!===")
            response_data = json.dumps({"status": True, "response": conversation_history})
            

            
            return StreamingHttpResponse(response_data, content_type="text/event-stream", status=status.HTTP_200_OK)

As far as I understand, there is no problem: Postman will read the response in full before displaying it. That is the usual behavior when the response is chunked.

ref: Chunked transfer encoding - Wikipedia

so is this correct ? the response is getting as stream in the response?. if i sent that response to front end will theyget as streamed data

I tried to make it work with a browser, and no luck, see networking - JavaScript read of stream/chunks - force single chunk per read() - Stack Overflow

An alternative is to use web sockets.