Yes the problem is clear
You’re counting total tokens via len, but a word can have more than one token, and in general, len is just a really rough/inaccurate way of counting tokens, great for quick napkin math, but bad for precision
Try this:
from tiktoken import Tokenizer
tokenizer = Tokenizer()
def count_tokens(text):
return len(list(tokenizer.tokenize(text)))
def sendMssgToChatGPT(text_MSG):
# Initialize messages list if it doesn't exist
if "messages" not in globals():
global messages
messages = []
# Append user message
messages.append({"role": "user", "content": text_MSG})
# Generate completion
completion = client.chat.completions.create(
model="gpt-3.5-turbo-16k",
messages=messages,
max_tokens=300
)
# Get model response
model_response = completion.choices[0].message.content
# NPC or chatGPT is saying this:
print(model_response)
# Append assistant response
messages.append({"role": "assistant", "content": model_response})
# Calculate total tokens
total_tokens = sum(count_tokens(message["content"]) if "content" in message else 0 for message in messages)
# Remove older messages if the token limit is reached
while total_tokens > (max_tokens_limit):
removed_message = messages.pop(0)
total_tokens -= count_tokens(removed_message["content"]) if "content" in removed_message else 0