Yes it is possible. BTW, I’m open sourcing a pydantic wrapper I’m developing that extends it for use with LLMs. It’s not ready for PYPI but you can check it out here: GitHub - nicholishen/tooldantic
! pip install -U git+https://github.com/nicholishen/tooldantic.git
Here is a snippet:
import asyncio
import json
import httpx
import openai
from bs4 import BeautifulSoup
from tooldantic import OpenAiResponseFormatBaseModel as BaseModel
client = openai.AsyncOpenAI()
class ArticleExtractor(BaseModel):
"""Use this tool to extract information from the user's articles"""
headline: str
summary: str
urls = [
"https://www.cnn.com/2019/08/29/us/new-hampshire-vanity-license-plate-trnd/index.html",
"https://www.cnn.com/2024/08/02/tech/google-olympics-ai-ad-artificial-intelligence/index.html",
]
async def get_url_content(url: str) -> str:
async with httpx.AsyncClient() as client:
response = await client.get(url)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
important_tags = ["h1", "h2", "h3", "p"]
content = []
for tag in important_tags:
elements = soup.find_all(tag)
for element in elements:
content.append(element.get_text())
return " ".join(content)
async def prepare_jsonl():
tasks = [get_url_content(url) for url in urls]
articles = await asyncio.gather(*tasks)
jsonl = []
for i, article in enumerate(articles, start=1):
jsonl.append(
{
"custom_id": f"request-{i}",
"method": "POST",
"url": "/v1/chat/completions",
"body": {
"model": "gpt-4o-mini",
"max_tokens": 1000,
"messages": [{"role": "user", "content": article}],
"response_format": ArticleExtractor.model_json_schema(),
},
}
)
with open("requests.jsonl", "w") as f:
for line in jsonl:
f.write(json.dumps(line) + "\n")
await prepare_jsonl()
EDIT:
tooldantic
can also create dynamic pydantic models from arbitrary data sources.
import tooldantic
some_existing_data = {
"headline": "The headline of the article",
"summary": "The summary of the article",
}
MyDataModel = tooldantic.ModelBuilder(
base_model=tooldantic.OpenAiResponseFormatBaseModel
).model_from_dict(
some_existing_data,
model_name="MyDataModel",
model_description="This is a custom data model from arbitrary data",
)
print(json.dumps(MyDataModel.model_json_schema(), indent=2))
assert MyDataModel(**some_existing_data).model_dump() == some_existing_data
# {
# "type": "json_schema",
# "json_schema": {
# "name": "MyDataModel",
# "description": "This is a custom data model from arbitrary data",
# "strict": true,
# "schema": {
# "type": "object",
# "properties": {
# "headline": {
# "type": "string"
# },
# "summary": {
# "type": "string"
# }
# },
# "required": [
# "headline",
# "summary"
# ],
# "additionalProperties": false
# }
# }
# }