Chunking and storing in csv

below is code for chunking from openai site. I need to store the chunked values in a csv with also shows the columns details of document id like d1,d2. Next need to show chunk size like c1,c2 for each document . After each document the chunk size should be 0.

Code:

max_tokens = 500

Function to split the text into chunks of a maximum number of tokens

def split_into_many(text, max_tokens = max_tokens):

# Split the text into sentences
sentences = text.split('. ')

# Get the number of tokens for each sentence
n_tokens = [len(tokenizer.encode(" " + sentence)) for sentence in sentences]

chunks = [x]
tokens_so_far = 0
chunk = [x]

# Loop through the sentences and tokens joined together in a tuple
for sentence, token in zip(sentences, n_tokens):

    # If the number of tokens so far plus the number of tokens in the current sentence is greater
    # than the max number of tokens, then add the chunk to the list of chunks and reset
    # the chunk and tokens so far
    if tokens_so_far + token > max_tokens:
        chunks.append(". ".join(chunk) + ".")
        chunk = []
        tokens_so_far = 0

    # If the number of tokens in the current sentence is greater than the max number of
    # tokens, go to the next sentence
    if token > max_tokens:
        continue

    # Otherwise, add the sentence to the chunk and add the number of tokens to the total
    chunk.append(sentence)
    tokens_so_far += token + 1

return chunks

shortened = [ ]

Loop through the dataframe

for row in df.iterrows():

# If the text is None, go to the next row
if row[1]['text'] is None:
    continue

# If the number of tokens is greater than the max number of tokens, split the text into chunks
if row[1]['n_tokens'] > max_tokens:
    shortened += split_into_many(row[1]['text'])

# Otherwise, add the text to the list of shortened texts
else:
    shortened.append( row[1]['text'] )