import json
from collections import deque
import os
import math
import re

# --- Config ---
ROOT_URL = "https://chatgtp.sixfold.nl/project/logs/chat/"
OUTPUT_DIR = "."
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Load full JSON export
with open("conversations.json", "r", encoding="utf-8") as f:
    data = json.load(f)

# Display available chat titles
print("Available chats:")
for i, chat in enumerate(data):
    print(f"{i+1}. {chat['title']}")

selection = input("Enter the numbers of the chats to combine, separated by commas: ")
selected_indices = [int(x.strip()) - 1 for x in selection.split(",")]

# Ask how many parts to split into, default to 1
split_parts_input = input("Split into how many equal parts? (Enter 1 for single file): ")
split_parts = int(split_parts_input) if split_parts_input.strip() else 1
split_parts = max(1, split_parts)

# Collect messages
all_messages = []

for idx in selected_indices:
    chat = data[idx]
    mapping = chat.get("mapping", {})

    queue = deque(["client-created-root"])
    while queue:
        node_id = queue.popleft()
        node = mapping.get(node_id, {})
        msg = node.get("message")
        if msg and msg.get("content", {}).get("parts"):
            all_messages.append({
                "chat_title": chat["title"],
                "timestamp": msg.get("create_time") or msg.get("update_time"),
                "author": msg["author"]["role"],
                "content": " ".join(msg["content"]["parts"]).strip()
            })
        queue.extend(node.get("children", []))

# Handle missing timestamps
counter = 0
for m in all_messages:
    if m["timestamp"] is None:
        m["timestamp"] = counter
        counter += 1

# Sort chronologically
all_messages.sort(key=lambda x: x["timestamp"])

# Split messages evenly
total_messages = len(all_messages)
messages_per_file = math.ceil(total_messages / split_parts)

# Clean first chat title to use as filename
def sanitize_filename(name):
    return re.sub(r'[^a-zA-Z0-9_-]', '_', name)

first_chat_title = sanitize_filename(all_messages[0]['chat_title']) if all_messages else "combined_chats"

output_files = []

for i in range(split_parts):
    part_messages = all_messages[i*messages_per_file:(i+1)*messages_per_file]
    
    # Filename logic: if only one file, use first chat title
    if split_parts == 1:
        filename = f"{first_chat_title}.md"
    else:
        filename = f"{first_chat_title}_{i+1:03}.md"
    
    filepath = os.path.join(OUTPUT_DIR, filename)
    with open(filepath, "w", encoding="utf-8") as f:
        for m in part_messages:
            line = f"[{m['timestamp']}] {m['chat_title']} ({m['author']}): {m['content']}\n"
            f.write(line)
    output_files.append(filename)

# Manifest always points to the files created
MANIFEST_PATH = os.path.join(OUTPUT_DIR, f"{first_chat_title}_manifest.md")
with open(MANIFEST_PATH, "w", encoding="utf-8") as f:
    for filename in output_files:
        f.write(f'open_url("{ROOT_URL}{filename}")\n')

print("Combined chats saved to files:")
for f_name in output_files:
    print(f"  {f_name}")
print(f"\nManifest file: {MANIFEST_PATH}")
