Here’s what doesn’t work correctly, with me making RESTful calls to diagnose everything behind the SDK, which has no Python example:
- adding file attributes as metadata returns a vector store file object without that same attributes just sent.
This is the vector search API return when employing that attribute key:value with “eq” as filter type that I set on a file in the vector store:
{
"object": "vector_store.search_results.page",
"search_query": [
"a placeholder text"
],
"data": [
{
"file_id": "file-1234",
"filename": "placeholder.txt",
"score": 0.5090088910904553,
"attributes": {
"filemeta1": "filevalue1"
},
"content": [
{
"type": "text",
"text": "This is the start of file contents.\r\nplaceholder.txt is empty except for this message.\r\n(You're probably supposed to be doing something else other than looking here...)"
}
]
}
],
"has_more": false,
"next_page": null
}
The file’s metadata comes even without a filters
.
So the SDK of whatever undocumented version is shuffling around the data object on you for its type
.
Here’s Python code just imagined up to get the JSON response from the endpoint.
import os
import httpx
def search_vs(id, query="a placeholder text", max=1) -> dict:
"""
RESTful vector store search, manual filter parameters on attributes
"""
api_key = os.environ.get("OPENAI_API_KEY")
if not api_key:
raise EnvironmentError("OPENAI_API_KEY environment variable is not set")
url = f"https://api.openai.com/v1/vector_stores/{id}/search"
headers = {
"Authorization": f"Bearer {api_key}",
}
body = {
"query": query,
"max_num_results": max,
#"filters": {
# "key": "filemeta1",
# "type": "eq",
# "value": "filevalue1",
#}
}
with httpx.Client(timeout=20.0) as client:
response = client.post(url, headers=headers, json=body)
response.raise_for_status()
return response.json()
id = "vs_1234"
response = search_vs(id) # add query= and max=
print(response)
…and how I got the attributes to a file, just one key/value.
def add_file_meta(id, file_id, key="filemeta1", value="filevalue1") -> dict:
"""
set attributes on a vector store's file (not the file in storage)
"""
api_key = os.environ.get("OPENAI_API_KEY")
if not api_key:
raise EnvironmentError("OPENAI_API_KEY environment variable is not set")
url = f"https://api.openai.com/v1/vector_stores/{id}/files/{file_id}"
headers = {
"Authorization": f"Bearer {api_key}",
}
body = {"attributes": {key: value}}
with httpx.Client(timeout=10.0) as client:
response = client.post(url, headers=headers, json=body)
response.raise_for_status()
return response.json()
You’ve got to upload the file to storage properly. Which doesn’t support any metadata or attributes.
Then attach the file ID to a vector store. That’s where you set the attributes that can be filtered on.