How to use embeddings csv to compare a user question?

llane · March 16, 2023, 5:45pm

I’m trying to build a Netlify function that uses javascript + an embeddings.csv I created to answer a users questions on my documentation site. I’m able to get the API to respond to me with an answer, but it doesn’t successfully pick the right article/embedding for context. It keeps picking the first article and returns “similarity” as NaN.

Does anyone have a straightforward approach to this use case? I can get it to work locally in python but Netlify doesn’t accept python serverless functions yet. I think my issue is in the cosineSimilarity or createContext function.

My CSV has columns: text, n_tokens, embeddings.

The embeddings column has data like this: [-0.01648879051208496, -0.003521612612530589, 0.012298344634473324, ...]

Thanks!

const { Configuration, OpenAIApi } = require("openai");
const configuration = new Configuration({
  apiKey: process.env.OPENAI_API_KEY,
});
const openai = new OpenAIApi(configuration);

const Papa = require("papaparse");
const axios = require("axios");

async function getEmbeddings() {
    const response = await axios.get("https://my-site-url/embeddings/embeddings.csv");
    const embeddings = Papa.parse(response.data, {
      header: true,
      dynamicTyping: true,
      skipEmptyLines: true,
    }).data;
    return embeddings;
  }
  

// function to calculate cosine similarity between two vectors
function cosineSimilarity(a, b) {
  const dotProduct = a.reduce((acc, val, i) => acc + val * b[i], 0);
  const normA = Math.sqrt(a.reduce((acc, val) => acc + val * val, 0));
  const normB = Math.sqrt(b.reduce((acc, val) => acc + val * val, 0));
  if (normA === 0 || normB === 0) {
    return 0;
  }
  return dotProduct / (normA * normB);
}


  // create a context for a queestion using the most similar article

  function createContext(question, embeddings) {
    // Calculate the similarity between the question and each article
    const similarities = embeddings.map((embedding) => {
      const articleVector = embedding.embeddings.split(',').map((x) => parseFloat(x));
      const words = embedding.text.split(' ');
      const questionVector = question.split(' ').map((word) => {
        const index = words.indexOf(word);
        if (index === -1) {
          return 0;
        } else {
          return articleVector[index];
        }
      });
      const similarity = cosineSimilarity(articleVector, questionVector);
      return { article: embedding.text, similarity };
    });
  
    // Sort the similarities in descending order by the similarity score
    similarities.sort((a, b) => b.similarity - a.similarity);
  
    // Return the top 1 article
    return {
      article: similarities[0].article.substring(0, 1500),
      similarity: similarities[0].similarity,
    };
  }

async function handler(event) {
    try {
        const embeddings = await getEmbeddings();

        const userQuestion = event.queryStringParameters.question || 'What is Pachw?'

        if (embeddings === undefined || embeddings.length === 0 ) {
            return { statusCode: 500, body: "Embeddings not found" }
        }
        
        let context = createContext(userQuestion, embeddings)
        
        const prompt = `Answer the question using the context. Question:${userQuestion}\n Context:${context.article} Similiarity: ${context.similarity}`;
        console.log("prompt", prompt)

        const response = await openai.createCompletion({
            model: "text-davinci-003",
            prompt: prompt,
            temperature: 0,
            max_tokens: 200,
        });

        console.log("response", response)

        return {
            statusCode: 200,
            body: JSON.stringify({ message: response.data.choices[0].text,
            prompt: prompt,
          }),
        }
    } catch (error) {
        return { statusCode: 500, body: error.toString() }
    } 
}

module.exports = { handler }

Topic		Replies	Views
Bad request Error... how can I fix it? API	5	2648	October 18, 2023
Creating a support chat bot for my business API	4	3727	December 18, 2023
Is there any sample code to split a json file into smaller chunks? API	11	13916	October 26, 2023
Send CSV file for use in Chat Completion? API	19	25072	December 13, 2023
Web Q&A embeddings - turorial API embeddings	5	1393	February 15, 2024

How to use embeddings csv to compare a user question?

Related topics