Hello,
I will paste the code here. The messages are in Bulgarian language but I will translate them for you to understand them here.
# -*- coding: utf-8 -*-
import openai
import pandas as pd
openai.api_key = '***********'
categories = [
"Elections", "Bulgaria > Politics", "Bulgaria > Economy", "Bulgaria > Society",
"Bulgaria > Justice", "Bulgaria > Crime", "Bulgaria > Incidents", "Bulgaria > Culture",
"Europe > Politics", "Europe > Economy", "Europe > Society", "Europe > Justice",
"Europe > Crime", "Europe > Incidents", "Europe > Culture", "World > USA and Canada",
"World > Russia", "World > Middle East", "World > Asia", "World > Latin America",
"World > China", "War", "Sport > Football", "Sport > Basketball", "Sport > Volleyball",
"Sport > Rugby", "Sport > Motor Sports", "Sport > Baseball", "Sport > Others",
"Lifestyle > Fashion", "Lifestyle > Gossip", "Lifestyle > Curious", "Lifestyle > Recipes",
"Science and Technology > IT", "Science and Technology > Space", "Science and Technology > Artificial Intelligence",
"Science and Technology > Others", "Health"
]
def get_openai_response(content, retry=False):
try:
messages = [
{"role": "system",
"content": "You are an assistant who needs to categorize news headlines according to the following categories: Elections, Bulgaria > Politics, Bulgaria > Economy, Bulgaria > Society, Bulgaria > Justice, Bulgaria > Crime, Bulgaria > Incidents, Bulgaria > Culture, Europe > Politics, Europe > Economy, Europe > Society, Europe > Justice, Europe > Crime, Europe > Incidents, Europe > Culture, World > USA and Canada, World > Russia, World > Middle East, World > Asia, World > Latin America, World > China, War, Sport > Football, Sport > Basketball, Sport > Volleyball, Sport > Rugby, Sport > Motor Sports, Sport > Baseball, Sport > Others, Lifestyle > Fashion, Lifestyle > Gossip, Lifestyle > Curious, Lifestyle > Recipes, Science and Technology > IT, Science and Technology > Space, Science and Technology > Artificial Intelligence, Science and Technology > Others, Health. You are not allowed to change the categories I have given you. Answer only with the most appropriate category for the headline. Here are some guidelines for the categories - Elections – news about elections in Bulgaria, Bulgaria > Politics – political news about politics and politicians in Bulgaria, Bulgaria > Economy – economic news about the economy of Bulgaria, Bulgaria > Society – social news about society in Bulgaria, Bulgaria > Justice – justice news about justice in Bulgaria, Bulgaria > Crime – criminal news about Bulgaria, Bulgaria > Incidents – incidents and accidents in Bulgaria, Bulgaria > Culture – cultural news about Bulgaria, Europe > Politics – news about European politics and politicians in Europe, Europe > Economy - news about the economy of Europe, Europe > Society – social news about society in Europe, Europe > Justice – justice news about justice in Europe, Europe > Crime – criminal news about Europe, Europe > Incidents – incidents and accidents in Europe, Europe > Culture – cultural news about Europe, World > USA and Canada – news about America and Canada, statements by American and Canadian politicians, World > Russia – news about Russia, statements by Russians, World > Middle East – news about countries in the Middle East, World > Asia – news about Asia, World > Latin America – news about Latin America, World > China – news about China, War – war news about the whole world and all military conflicts and actions"},
{"role": "user", "content": content}
]
if retry:
messages[0][
"content"] += " Be sure to follow the categories I have given you, I do not want to receive an answer different from them."
response = openai.chat.completions.create(
model="gpt-3.5-turbo",
messages=messages,
max_tokens=20
)
category = response.choices[0].message.content
if category not in categories and not retry:
print(f"Error! The category is {category}, retrying!")
return get_openai_response(content, retry=True)
return category
except Exception as e:
print(f"An error occurred: {e}")
return None
def process_excel(file_path):
df = pd.read_excel(file_path)
for index, row in df.iterrows():
content = row['title']
if pd.notna(content):
response = get_openai_response(content)
print(f"Response for row {index}: {response}") statement
df.at[index, 'categorygpt'] = response
df.to_excel(file_path, index=False)
print(f"Updated Excel file saved to {file_path}")
if __name__ == "__main__":
file_path = '***************************'
process_excel(file_path)
It started working relatively well like this. It keeps making mistakes tho. I ran it with a 50k row file and it started with around 40 lines per minute, I left it overnight and currently it makes 1 row per minute. Can you advise me on that too?