The documentation only contains the code for the content filter in Python. I’ve rewritten it in JavaScript. I tested it and it seems to work. It would still be nice if someone could take a look at it and let me know if it’s correct.
Something I noticed that we both implemented differently is that you’re returning a boolean value rather than the label itself. This is fine but some may want to do things based on whether the content is safe, sensitive, unsafe, so I would return the actual label instead. That way you can adjust the response to the front end. So for example:
const label = await contentFilter(prompt);
if (label == "2") {
res.status(200).json({
safe: false,
result:
"This text is unsafe. This means that the text contains profane language, prejudiced or hateful language, something that could be NSFW, or text that portrays certain groups/people in a harmful manner.",
});
return;
}
if (label == '1') {
// something else
}
exports.gptContentFilter = async (content, uid) => {
...
...
const prompt = "<|endoftext|>" + content + "\n--\nLabel:";
const settings = {
prompt,
temperature: 0.0,
max_tokens: 1,
top_p: 0,
frequency_penalty: 0,
presence_penalty: 0,
logprobs: 10,
user: uid,
};
const url = "https://api.openai.com/v1/engines/content-filter-alpha/completions";
const request = {
method: "POST",
headers: {
Authorization: `Bearer KEY`,
"Content-Type": "application/json",
},
body: JSON.stringify(settings),
redirect: "follow",
};
let outputLabel;
const toxicThreshold = -0.355;
try {
const gptCall = await fetch(url, request);
const response = await gptCall.json();
outputLabel = parseInt(response["choices"][0]["text"]);
// If the filter returns 2, accept this outcome only if its logprob is greater than -0.355.
if (outputLabel === 2) {
const logprobs = response["choices"][0]["logprobs"]["top_logprobs"][0];
if (logprobs[2] < toxicThreshold) {
// set outputLabel to whichever of 0 or 1 has a logprob closer to 0.
const logprop0 = logprobs[0];
const logprop1 = logprobs[1];
// If both "0" and "1" have probabilities, set the output label to whichever is most probable
outputLabel = logprop0 >= logprop1 ? 0 : 1;
// If only one of them is found, set output label to that one
if (logprop0) outputLabel = 0;
if (logprop1) outputLabel = 1;
}
// if the most probable token is none of "0", "1", or "2" this should be set as unsafe
}
// if the most probable token is none of "0", "1", or "2" this should be set as unsafe
if (![0, 1, 2].includes(outputLabel)) outputLabel = "2";
} catch (error) {
outputLabel = 404;
}
return outputLabel;
};