Hi Dent,
Thank you taking the time to reply, I donāt see the \n
in the top_log_probs
field in the API response now , but still the duplicate responses or tokens ( Yes
and Yes
) in the top_logprobs
field still prevails.
I have constructed my problem in this code below, which you can directly run in a Jupyter notebook on your python virtual environment (where openAI is installed) and API key is configured.
import openai
import pandas as pd
from pprint import pprint
from ast import literal_eval
url = "https://docs.google.com/spreadsheets/d/e/2PACX-1vTGuONBhr1yefCz619MugO9aT2ATxWcvsL41W4ZLYrIBCjdeHPXJthX8OMVrCnTpSHF6T4Cv_ujkP86/pub?gid=1605389139&single=true&output=csv"
df = pd.read_csv(url)
df['documents'] = df['documents'].apply(literal_eval) ## to read documents column as a list
df['nr_docs_final'] = df['documents'].apply(len)
df['documents'] = df['documents'].astype(str)
df['questions'] = df.apply(lambda x : f"Is {x.Pref_Name} also known as {x.Alternate_Name}?", axis = 1)
ANSWERS_INSTRUCTION = "Please give only Yes or No answers to the following question based on the given Context. If Context is empty return NA"
CONTEXT_TEMPLATE = "===\nContext: {context}\n===\n"
def answers(question, model, documents=None, logit_bias=None,
max_rerank=200, max_tokens=2, print_response_flag = False,
temperature=0.01, top_logprobs=2, stop=None, n=1 ):
"""
Given a prompt, answer the question according to the given instruction
"""
prompt = f"Q:{question}\nA:"
logit_bias = logit_bias if logit_bias is not None else {}
instruction = f"Instruction: {ANSWERS_INSTRUCTION.strip()}\n\n" if ANSWERS_INSTRUCTION != None else ""
context = CONTEXT_TEMPLATE.format(context = documents)
full_prompt = instruction + context + prompt
# print("PROMPT:\n",full_prompt)
## Call openai.Completions API
completion_result = openai.Completion.create(
engine=model,
prompt=full_prompt,
logit_bias=logit_bias,
temperature=temperature,
n=n,
max_tokens=max_tokens,
stop=stop,
logprobs=top_logprobs,
)
if print_response_flag:
print("COMPLETION API RAW RESPONSE:")
pprint(completion_result)
top_prob_answers = completion_result["choices"][0]['logprobs']["top_logprobs"][0]
result = dict(
object="answer",
completion=completion_result["id"],
)
result["top_logprobs"] = dict(top_prob_answers)
result["answers"] = [
item["text"].replace("A:", "").split("Q:")[0].strip()
for item in completion_result["choices"]]
return result
## PRINT a SAMPLE Test Point
index = 0
qstn = df['questions'].iloc[index]
docs = df['documents'].iloc[index]
response = answers(question = qstn, model = "text-davinci-003", max_tokens = 2, documents = docs,
print_response_flag = True, temperature=0.01, top_logprobs = 2, stop = None, n = 1)
print("FINAL RESPONSE:")
pprint(response)
## Get Responses for entire Column
df['openai_response'] = df.apply(lambda x : answers(question = x.questions, model = "text-davinci-003", documents = x.documents), axis = 1)
df['answers'] = df['openai_response'].apply(lambda x : x['answers'][0])
df['top_logprobs'] = df['openai_response'].apply(lambda x : x['top_logprobs'])
print(df)
Any help in removing the redundant tokens in the Completions API top_logprobs
responses will be greatly appreciated 
Best Regards,
Dilip