How to track individual usage?

Hi, I have mixed the toktrack from @davell with the cost details idea from Sebastian Shepard ( Handling OpenAI API Cost Retrieval Changes - at jeypop) to get this alternative:

import json
import requests
import datetime
from dateutil.relativedelta import relativedelta
import os

import pandas as pd

import time

openai_org_id = os.getenv('ORG_ID')
openai_api_key = os.getenv('OPENAI_API_KEY')

first_day_of_month = datetime.date(2023,7,1) #start date
current_day = datetime.date.today() #end date

headers = {
    "method": "GET",
    "authority": "api.openai.com",
    "scheme": "https",
    "path": f"/v1/organizations/{openai_org_id}/users",
    "authorization": f"Bearer {openai_api_key}",
}

users_response = requests.get(f"https://api.openai.com/v1/organizations/{openai_org_id}/users", headers=headers)
users = users_response.json()["members"]["data"]

df_costs = pd.DataFrame()

for user in users:
  
    id_of_user = user["user"]["id"]

    current_date = first_day_of_month
    
    while current_date <= current_day:
        
        # we can get 5 requests per min, so I set 1 request per 15s just to make sure
        
        start = time.time()
        
        usage_headers = {
            "method": "GET",
            "authority": "api.openai.com",
            "authorization": f"Bearer {openai_api_key}",
            "openai-organization": openai_org_id,
        }
        
        usage_response = requests.get(f"https://api.openai.com/v1/usage?date={current_date}&user_public_id={id_of_user}", headers=usage_headers)
        user_data = usage_response.json()
     
        if len(user_data['data'])==0:
            
            current_date += relativedelta(days=1)
            
            end = time.time()
            
            if end - start < 15:
                time.sleep(15 - (end - start))
                
            continue
            
        else:
            usage_data = user_data["data"]
            whisper_data = user_data["whisper_api_data"]

            data = usage_data + whisper_data

            df = pd.DataFrame(data)
            df['local_timestamp'] = df['aggregation_timestamp'].apply(lambda x: datetime.datetime.fromtimestamp(x))
            # converting to openai tz
            df['system_timestamp'] = df['local_timestamp'].dt.tz_localize('America/Sao_Paulo').dt.tz_convert("UTC")
            df['user'] = user["user"]["name"].lower().replace(" ", "_")
            df['email'] = user["user"]["email"]

            df_costs = pd.concat([df_costs,df])

            current_date += relativedelta(days=1)

            end = time.time()
        
            if end - start < 15 :
                time.sleep(15 - (end - start))
                    
# model costs

model_costs = {
    "gpt-3.5-turbo": {"context": 0.0015, "generated": 0.002},
    "gpt-3.5-turbo-0301": {"context": 0.0015, "generated": 0.002},
    "gpt-3.5-turbo-0613": {"context": 0.0015, "generated": 0.002},
    "gpt-3.5-turbo-16k": {"context": 0.003, "generated": 0.004},
    "gpt-3.5-turbo-16k-0613": {"context": 0.003, "generated": 0.004},
    "gpt-4": {"context": 0.03, "generated": 0.06},
    "gpt-4-0314": {"context": 0.03, "generated": 0.06},
    "gpt-4-0613": {"context": 0.03, "generated": 0.06},
    "gpt-4-32k": {"context": 0.06, "generated": 0.12},
    "gpt-4-32k-0314": {"context": 0.06, "generated": 0.12},
    "gpt-4-32k-0613": {"context": 0.06, "generated": 0.12},
    "text-embedding-ada-002-v2": {"context": 0.0001, "generated": 0},
    "text-davinci:003": {"context": 0.03, "generated": 0.12},
    "whisper-1": {"context": 0.006 / 60, "generated": 0}, 
}

mc = pd.DataFrame(model_costs)
mc = mc.T.reset_index()

# cost calculation

df_costs=df_costs.merge(mc, left_on='snapshot_id', right_on='index', how='left')
df_costs['context_costs']=(df_costs['n_context_tokens_total']/1000)*df_costs['context']
df_costs['generated_costs']=(df_costs['n_generated_tokens_total']/1000)*df_costs['generated']
df_costs['total_costs']=df_costs['context_costs']+df_costs['generated_costs']

# saving a json file
    
with open(f"openai_costs_{first_day_of_month}_to_{current_date}.json", "w") as f:
    json.dump(df_costs.to_json(index=False, orient='table'), f)


# cost summary

def summary_costs(x):
    names = {
        'requests': x['n_requests'].sum()
        , 'context_tokens': x['n_context_tokens_total'].sum()
        , 'generated_tokens': x['n_generated_tokens_total'].sum()
        , 'context_costs': x['context_costs'].sum()
        , 'generated_costs': x['generated_costs'].sum()
        , 'total_costs': x['total_costs'].sum()
    }
    
    return pd.Series(names, index=['requests'
                                   , 'context_tokens'
                                   , 'generated_tokens'
                                   , 'context_costs'
                                   , 'generated_costs'
                                   , 'total_costs'
                                  ])

df_costs.groupby(['user', pd.Grouper(key='system_timestamp', freq='D'),'snapshot_id', 'operation']).apply(summary_costs)

# costs by day

df_costs.groupby([pd.Grouper(key='system_timestamp', freq='D')]).apply(summary_costs)

#costs by user

df_costs.groupby(['user']).apply(summary_costs).sort_values('total_costs', ascending=False)

1 Like