import re
import random
from collections import defaultdict

Modelname = "AG Omni"
nsize = 25

def load_data(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        text = file.read().lower()
    text = text.replace('<|startofparagraph|>', '')
    text = text.replace('<|endofparagraph|>', '')
    lines = text.splitlines()
    processed_lines = []
    for line in lines:
        line = re.sub(r"<\|startofline\|>\s*<\|endofline\|>", "", line)
        line = line.replace("<|startofline|>", "").replace("<|endofline|>", "")
        processed_lines.append(line)
    return " ".join(processed_lines)

def build_ngram_model(text, n=25):
    words = text.split()
    model = defaultdict(list)
    for i in range(len(words) - n):
        gram = tuple(words[i:i + n])
        next_word = words[i + n]
        model[gram].append(next_word)
    return model

def generate_response(model, user_input, n=25, max_length=100):
    user_input = user_input.lower().split()
    current_gram = None
    for i in range(len(user_input)):
        possible_gram = tuple(user_input[-(n - i):])
        if possible_gram in model:
            current_gram = possible_gram
            break
    if not current_gram:
        current_gram = random.choice(list(model.keys()))
    result = list(current_gram)
    for _ in range(max_length - len(current_gram)):
        possible_next_words = model.get(current_gram)
        if not possible_next_words:
            break
        next_word = random.choice(possible_next_words)
        result.append(next_word)
        current_gram = tuple(result[-n:])
    output = " ".join(result)
    output = re.sub(r"<\|startofline\|>|<\|endofline\|>", "", output)
    return output

def chat(model, n=25):
    print(f"{Modelname}: Hello! Type 'exit' to end the chat.")
    while True:
        user_input = input("You: ")
        if user_input.lower() == "exit":
            print(f"{Modelname}: Goodbye!")
            break
        response = generate_response(model, user_input, n)
        print(f"{Modelname}:", response)

training_data_path = 'training_data.txt'
text_data = load_data(training_data_path)
ngram_model = build_ngram_model(text_data, nsize)
chat(ngram_model, nsize)