ModuleNotFoundError: No module named 'openai.embeddings_utils' in new Python library

I did a bit more, giving functions for doing embeddings.

More significantly, I

  • demonstrate taking a list of multiple strings to embed in one call;
  • show how to use the base64 method to get 32 bit floats from the API;
  • load them to numpy 2D arrays (of “1536D”)
  • for the dot-product calculation, I type to numpy doubles (so the same embeddings = 1.0)
  • I also get headers, letting you get to current token rate limits, etc.

Below is code blocks without explanation between, so you should be able to select and copy all.

"""embed util @_j;  future: practical initial combining or chunking
call_embed for OpenAI embedding. list -> numpy array of multiple vectors"""
import asyncio
from openai import AsyncOpenAI
import base64
import numpy as np
import tiktoken
import json


def token_count(string_list) -> int:
    total = 0
    tik = tiktoken.get_encoding("cl100k_base")
    for i_str in string_list:
        total += len(tik.encode(i_str))
    return total
async def call_embed(em_input) -> (np.ndarray, dict, dict):
    """OpenAI ada embeddings - returns tuple[2D array, json w/o data, headers]"""
    client = AsyncOpenAI(timeout=30, max_retries=4)
    try:
        em_api = await client.embeddings.with_raw_response.create(
            model="text-embedding-ada-002",  # may need Azure deployment name
            input=em_input,
            encoding_format="base64",
        )
    except Exception as e:
        print(f"Embeddings failure {e}")
        raise
    em_dict = em_api.http_response.json()
    em_ndarray = np.empty((len(em_dict["data"]), 1536), dtype=np.single)
    for i, item in enumerate(em_dict["data"]):
        em_bytes = base64.b64decode(item["embedding"])
        em_ndarray[i] = np.frombuffer(em_bytes, dtype=np.single)
        if not em_ndarray[i].size == 1536:
            raise ValueError
    em_meta = {
        **em_dict,
        "data": [{**item, "embedding": "..."} for item in em_dict["data"]],
    }
    return em_ndarray, em_meta, dict(em_api.headers.items())
def cosine_similarity(asingle, bsingle) -> np.double:
    """return normalized dot product of two arrays"""
    a = asingle.astype(np.double)
    b = bsingle.astype(np.double)
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
def demo_report(string_list, vector_list):
    print(" == Sample from vectors returned ==")
    for i in range(len(vector_list)):
        print(i, vector_list[i][768:771].tolist())

    print("\n == Cosine similarity and vector comparison of all inputs ==")
    for i in range(len(vector_list)):
        for j in range(i + 1, len(vector_list)):
            similarity = cosine_similarity(vector_list[i], vector_list[j])
            
            identity = np.array_equal(vector_list[i], vector_list[j])
            print(f'{i}:"{string_list[i][:30]}" <==> {j}:"{string_list[j][:30]}":')
            print(f"   {similarity:.16f} - identical: {identity}")
def demo():
    input_list = [
        "Jimmy loves his cute kitten",
        "How many deaths in WWII Normandy invasion?",
    ]
    input_size = token_count(input_list)
    if input_size <= 8192:
        try:
            embed, metadata, headers = asyncio.run(call_embed(input_list))
        except Exception as e:
            print(f"call_embed function failed, {e}")
            raise
    else:
        print(f"Too many tokens to send!")
        raise ValueError
    print(
        f"[Total tokens for {len(input_list)} embeddings] "
        f"Counted: {input_size}; API said: {metadata['usage']['total_tokens']}\n"
    )
    # print(json.dumps(dict(headers.items()), indent=1))
    headers1['x-ratelimit-remaining-requests']
    demo_report(input_list, embed)
    # return is for later console experimentation
    return embed, metadata, headers
if __name__ == "__main__":
    demoembed, demometa, demoheaders = demo()

""" "meta" is just embeddings without the b64 data
{
  "object": "list",
  "data": [
    {
      "object": "embedding",
      "index": 0,
      "embedding": "..."
    }
  ],
  "model": "text-embedding-ada-002-v2",
  "usage": {
    "prompt_tokens": 2,
    "total_tokens": 2
  }
}
"""

I put an example of use, so you can just get a report and feel for the data returned and matches:

[Total tokens for 4 embeddings] Counted: 24; API said: 24
== Sample from vectors returned ==
0 [-0.02899613417685032, 0.029123421758413315, -0.0032346982043236494]
1 [-0.02899613417685032, 0.029123421758413315, -0.0032346982043236494]
2 [-0.02899613417685032, 0.029123421758413315, -0.0032346982043236494]
3 [0.0006744746351614594, -0.00696355989202857, -0.02934185042977333]

== Cosine similarity and vector comparison of all inputs ==
0:“Jimmy loves his cute kitten” <==> 1:“Jimmy loves his cute kitten”:
1.0000000000000000 - identical: True
0:“Jimmy loves his cute kitten” <==> 2:“Jimmy loves his cute kitten”:
1.0000000000000000 - identical: True
0:“Jimmy loves his cute kitten” <==> 3:“How many deaths in WWII Norman”:
0.6947125420475460 - identical: False
1:“Jimmy loves his cute kitten” <==> 2:“Jimmy loves his cute kitten”:
1.0000000000000000 - identical: True
1:“Jimmy loves his cute kitten” <==> 3:“How many deaths in WWII Norman”:
0.6947125420475460 - identical: False
2:“Jimmy loves his cute kitten” <==> 3:“How many deaths in WWII Norman”:
0.6947125420475460 - identical: False

There’s some more “utils” to replace, but they seem like teaching aids.

What to write next for the forum significantly diverged in my mind depending on the application, but you can imagine a fast database object to get prompt or HyDE retrievals…

"""class DatabaseObject:  # something to think about
    def __init__(self):
        pass  # maybe make your database 
              # into a memory object with top match methods

    def top_n(self, match_input, db, n=5, threshold=0.85, max_tokens=2000):
        # embedding magic within a budget here
        return match_outputs"""