Hi! I’m trying to make an Speech-To-Text using an ESP32 board.
I’m currently modifying a non-finished library that integrates OpenAI API with Arduino code (in github me-no-dev/OpenAI-ESP32).
I modified the code to only use a function to send the audio (in chunks) to the server:
String OpenAI_AudioTranscription::file(File& audio_file, OpenAI_Audio_Input_Format f) {
if (!audio_file) {
Serial.println("Invalid file handle");
return "";
}
if (oai.getWiFiClient() == nullptr) {
Serial.println("WiFi client not set");
return "";
}
String endpoint = "audio/transcriptions";
String boundary = "----WebKitFormBoundary9HKFexBRLrf9dcpY";
String itemPrefix = "--" + boundary + "\r\nContent-Disposition: form-data; name=";
// Calcular el tamaño total del cuerpo de la petición
String reqBody = itemPrefix + "\"model\"\r\n\r\nwhisper-1\r\n";
if (prompt != NULL) {
reqBody += itemPrefix + "\"prompt\"\r\n\r\n" + String(prompt) + "\r\n";
}
if (response_format != OPENAI_AUDIO_RESPONSE_FORMAT_JSON) {
reqBody += itemPrefix + "\"response_format\"\r\n\r\n" + String(audio_response_formats[response_format]) + "\r\n";
}
if (temperature != 0) {
reqBody += itemPrefix + "\"temperature\"\r\n\r\n" + String(temperature) + "\r\n";
}
if (language != NULL) {
reqBody += itemPrefix + "\"language\"\r\n\r\n" + String(language) + "\r\n";
}
reqBody += itemPrefix + "\"file\"; filename=\"audio." + String(audio_input_formats[f]) + "\"\r\nContent-Type: " + String(audio_input_mime[f]) + "\r\n\r\n";
String reqEndBody = "\r\n--" + boundary + "--\r\n";
size_t totalLength = reqBody.length() + audio_file.size() + reqEndBody.length();
// Iniciar la conexión HTTP
HttpClient http(*oai.getWiFiClient(), "api.openai.com", 80);
http.beginRequest();
http.post("/v1/" + endpoint);
http.sendHeader("Content-Type", "multipart/form-data; boundary=" + boundary);
http.sendHeader("Authorization", "Bearer " + oai.getApiKey());
http.sendHeader("Content-Length", totalLength);
http.beginBody();
// Enviar la primera parte del cuerpo (headers del form-data)
http.write((const uint8_t*)reqBody.c_str(), reqBody.length());
// Enviar el archivo en chunks
const size_t CHUNK_SIZE = 1024; // Ajusta según la memoria disponible
uint8_t buffer[CHUNK_SIZE];
while (audio_file.available()) {
size_t bytesRead = audio_file.read(buffer, CHUNK_SIZE);
if (bytesRead > 0) {
http.write(buffer, bytesRead);
}
}
// Enviar la parte final del cuerpo
http.write((const uint8_t*)reqEndBody.c_str(), reqEndBody.length());
http.endRequest();
// Procesar la respuesta
int statusCode = http.responseStatusCode();
String result = "";
if (statusCode == 200) {
// Leer la respuesta en chunks también
const size_t READ_CHUNK_SIZE = 64; // Tamaño del buffer de lectura
char responseBuffer[READ_CHUNK_SIZE + 1];
while (http.available()) {
int bytesRead = http.read((uint8_t*)responseBuffer, READ_CHUNK_SIZE);
if (bytesRead > 0) {
responseBuffer[bytesRead] = 0; // Null terminator
result += responseBuffer;
}
}
}
else {
Serial.println("HTTP Error: " + String(statusCode));
}
http.stop();
// Procesar el JSON de respuesta
if (!result.length()) {
Serial.println("Empty result!");
return result;
}
cJSON* json = cJSON_Parse(result.c_str());
String error = getJsonError(json);
result = "";
if (error.length()) {
Serial.println(error);
}
else if (cJSON_HasObjectItem(json, "text")) {
cJSON* text = cJSON_GetObjectItem(json, "text");
result = String(cJSON_GetStringValue(text));
}
cJSON_Delete(json);
return result;
}
But I’m getting HTTP Error: -3 (??) in console.
I guess I’m making the http request not in the correct way.
Can someone help me? Thanks!!