Hi! I am trying to build a workflow that iterates over contents from an Excel file, which uses assistant api retrieval. It is always processing the first row and I am not sure what is wrong. Thanks to anyone who helps.
import os
import pandas as pd
import openai
import time
from openai import OpenAI
from dotenv import load_dotenv
# Load the OpenAI API key
load_dotenv('your_env_file.env')
api_key = os.getenv('YOUR_ENV_VARIABLE_FOR_API_KEY')
openai.api_key = api_key # Make sure to set the API key for the openai package
# Read contents from an Excel file
excel_path = 'path_to_your_excel_file.xlsx'
df = pd.read_excel(excel_path)
content_column = 'your_content_column_name' # Replace with the name of your column
# Prepare a column in the DataFrame to store responses
df['Assistant_Responses'] = ''
# Upload a file with an "assistants" purpose
client = OpenAI()
file = client.files.create(
file=open("path_to_your_file.json", "rb"),
purpose='assistants'
)
print(file.id)
# Iterate over each content in the DataFrame
for index, row in df.iterrows():
user_input = row[content_column]
print(f"Processing row {index + 1}/{len(df)}...")
# Create a thread
thread = client.beta.threads.create()
message = client.beta.threads.messages.create(
thread_id=thread.id,
role="user",
content=user_input,
file_ids=[file.id]
)
run = openai.beta.threads.runs.create(
thread_id=thread.id,
assistant_id="your_assistant_id"
)
while run.status != 'requires_action':
time.sleep(1)
run = openai.beta.threads.runs.retrieve(
thread_id=thread.id,
run_id=run.id
)
try:
messages = openai.beta.threads.messages.list(
thread_id=thread.id
)
# Assuming the last message is the response
if messages.data:
response = messages.data[-1].content
df.at[index, 'Assistant_Responses'] = response
except Exception as e:
print(f"Error processing row {index + 1}: {e}")
# Save the updated DataFrame back to the Excel file
df.to_excel(excel_path, index=False)
print("Processing complete.")
import os
import openai
import time
from openai import OpenAI
from dotenv import load_dotenv
load_dotenv('untitled.env')
api_key = os.getenv('OPENAI_API_KEY')
print(api_key)
client = OpenAI()
#Loop through each line
import pandas as pd
# Function to process queries and get responses
def process_user_queries(client, excel_path, content_column, file_id, assistant_id, instructions):
# Read contents from an Excel file
df = pd.read_excel(excel_path)
# Prepare a column in the DataFrame to store responses if not already present
if 'Assistant_Responses' not in df.columns:
df['Assistant_Responses'] = ''
# Adjust the start row if it's beyond the range of the DataFrame
start_row = 467
# Iterate over each content in the DataFrame starting from the start_row
for index, row in df.iloc[start_row:].iterrows():
user_query = row[content_column]
print(f"Processing row {index + 1}/{len(df)}: {user_query}")
# Create a new thread for each query
print(f"Creating thread for row {index + 1}...")
thread = client.beta.threads.create()
print(f"Sending message for row {index + 1}...")
message = client.beta.threads.messages.create(
thread_id=thread.id,
role="user",
content=user_query,
file_ids=[file_id]
)
# Create a run with specific instructions
print(f"Creating run for row {index + 1}...")
run = client.beta.threads.runs.create(
thread_id=thread.id,
assistant_id=assistant_id,
instructions=instructions
)
# Wait for the run to complete
print(f"Waiting for run to complete for row {index + 1}...")
while run.status != "completed":
time.sleep(3) # Sleep to avoid hitting API limits
run = client.beta.threads.runs.retrieve(
thread_id=thread.id,
run_id=run.id
)
print(run.status)
# Retrieve and store all messages from the thread
messages = client.beta.threads.messages.list(
thread_id=thread.id
)
full_conversation = []
if messages.data:
for msg in messages.data:
# Extract the message text and add it to the list
full_conversation.append(msg.content[0].text.value)
# Join all messages into a single string
df.at[index, 'Assistant_Responses'] = '\n'.join(full_conversation)
# Save the updated DataFrame back to the Excel file after each row is completed
df.to_excel(excel_path, index=False)
print(f"Row {index + 1} processing completed and saved.")
print("All rows processed and saved.")
# Designate file path and content to be processed
excel_path = ''
content_column = '' # Replace with the name of your column
# Replace with your actual file and assistant IDs and instructions
file_id = ''
assistant_id = ''
instructions = (""" """)
# Process the queries and get responses
process_user_queries(client, excel_path, content_column, file_id, assistant_id, instructions)