import os def chunk_file(input_file, output_dir=None, start_num=1, padding=2): """ Split a file into chunks and save each chunk as a separate file. Args: input_file (str): Path to the input file output_dir (str, optional): Directory to save chunk files. Defaults to current directory. start_num (int, optional): Starting number for the chunk files. Defaults to 1. padding (int, optional): Number of digits to pad the incremental numbers. Defaults to 2. """ if output_dir and not os.path.exists(output_dir): os.makedirs(output_dir) with open(input_file) as f: content = f.read() chunks = content.split("---") chunk_count = start_num for chunk in chunks: chunk = chunk.replace('---', '').strip() if not chunk: # Skip empty chunks continue # Define output path with padded incremental number file_name = f'chunk_{chunk_count:0{padding}d}.md' if output_dir: outfile_path = os.path.join(output_dir, file_name) else: outfile_path = file_name with open(outfile_path, 'w') as outfile: outfile.write(chunk) chunk_count += 1 return chunk_count - start_num # Return the number of chunks written # Example usage if __name__ == "__main__": #input_file = "/home/gra/PycharmProjects/librarian_vspace/examples/chunks/knowledge_chunks_detailed.md" input_file = "/home/gra/PycharmProjects/librarian_vspace/examples/chunks/knowledge_chunks_1500.md" # You can specify an output directory or omit it to use the current directory output_dir = "/examples/chunks/chunk_md_x" chunk_file(input_file, output_dir)