Hello, I fine-tuned ada model to classify documents into two groups: simple and normal. The problem is that in some cases, the model does not assign either of the two labels. Instead, the model returns something like the following. Could anyone help me understand why the model returns a random token (such as “The” or “of”) as a completion when the model is trained on data with only two labels?
openai.Completion.create(model=fine_tuned_model, prompt=prompt + ’ \n\n###\n\n’, max_tokens=1, logprobs=3, temperature=0)
2it [00:00, 4.60it/s]{
“id”: “cmpl-7ckHxoPDqhHM8NJs2A81kTlIhcOnB”,
“object”: “text_completion”,
“created”: 1689468753,
“model”: “ada:ft-the-school-2023-07-08-19-26-00”,
“choices”: [
{
“text”: " simple",
“index”: 0,
“logprobs”: {
“tokens”: [
" simple"
],
“token_logprobs”: [
-0.21027027
],
“top_logprobs”: [
{
" simple": -0.21027027,
" of": -5.2638874,
" normal": -1.7454323
}
],
“text_offset”: [
5008
]
},
“finish_reason”: “length”
}
],
“usage”: {
“prompt_tokens”: 1077,
“completion_tokens”: 1,
“total_tokens”: 1078
}
}
{
“id”: “cmpl-7ckHxS8qLVL2SluQGPKd4YPowQ9ik”,
“object”: “text_completion”,
“created”: 1689468753,
“model”: “ada:ft-the-school-2023-07-08-19-26-00”,
“choices”: [
{
“text”: " normal",
“index”: 0,
“logprobs”: {
“tokens”: [
" normal"
],
“token_logprobs”: [
-0.48004854
],
“top_logprobs”: [
{
" “: -4.862535,
" simple”: -1.0338054,
" normal": -0.48004854
}
],
“text_offset”: [
5008
]
},
“finish_reason”: “length”
}
],
“usage”: {
“prompt_tokens”: 1156,
“completion_tokens”: 1,
“total_tokens”: 1157
}
}
3it [00:00, 3.70it/s]{
“id”: “cmpl-7ckHy0TBZ4tZ2NdlgXDtgVjJCMakp”,
“object”: “text_completion”,
“created”: 1689468754,
“model”: “ada:ft-the-school-2023-07-08-19-26-00”,
“choices”: [
{
“text”: " simple",
“index”: 0,
“logprobs”: {
“tokens”: [
" simple"
],
“token_logprobs”: [
-0.59430796
],
“top_logprobs”: [
{
" “: -3.1227753,
" simple”: -0.59430796,
" normal": -1.0200912
}
],
“text_offset”: [
5008
]
},
“finish_reason”: “length”
}
],
“usage”: {
“prompt_tokens”: 1040,
“completion_tokens”: 1,
“total_tokens”: 1041
}
}
{
“id”: “cmpl-7ckHyUD8C6hbiHSUaA9Xw0WsaPpcq”,
“object”: “text_completion”,
“created”: 1689468754,
“model”: “ada:ft-the-wharton-school-2023-07-08-19-26-00”,
“choices”: [
{
“text”: " “,
“index”: 0,
“logprobs”: {
“tokens”: [
" "
],
“token_logprobs”: [
-0.6145109
],
“top_logprobs”: [
{
" “: -0.6145109,
" simple”: -3.308858,
" The”: -3.3845778
}
],
“text_offset”: [
5008
]
},
“finish_reason”: “length”
}
],
“usage”: {
“prompt_tokens”: 1252,
“completion_tokens”: 1,
“total_tokens”: 1253
}
}
{
“id”: “cmpl-7cjyIk3ggfkeJ69K0ZaAlsrWrm98D”,
“object”: “text_completion”,
“created”: 1689467534,
“model”: “ada:ft-the-school-2023-07-08-19-26-00”,
“choices”: [
{
“text”: " “,
“index”: 0,
“logprobs”: {
“tokens”: [
" "
],
“token_logprobs”: [
-0.63914996
],
“top_logprobs”: [
{
" “: -0.63914996,
" simple”: -3.1395473,
" The”: -3.417498
}
],
“text_offset”: [
5008
]
},
“finish_reason”: “length”
}
],
“usage”: {
“prompt_tokens”: 1252,
“completion_tokens”: 1,
“total_tokens”: 1253
}
}
Thank you in advance!