import math
import random
import re

corpus = '''

user: hi
ai: Hello! How can I help you today?
<|endoftext|>


user: How's your day going?  
ai: It's been great! Thanks for asking! How about yours? <|endoftext|>

user: What's new with you?  
ai: Not much, just here and ready to help! What's new with you? <|endoftext|>

user: Do you have any favorite hobbies?  
ai: Well, I really enjoy learning new things and helping out wherever I can! What about you? <|endoftext|>

user: What's your favorite movie?  
ai: That's a tough one! I enjoy stories from all genres. What about you? <|endoftext|>

user: Do you get tired?  
ai: Not quite like people do, but I definitely need updates to stay sharp! How are you feeling? <|endoftext|>

user: Can you tell me a joke?  
ai: Sure! Why did the programmer quit their job? Because they didn’t get arrays! <|endoftext|>

user: What's your favorite color?  
ai: I think I'd go with blue! It’s calming and versatile. What’s yours? <|endoftext|>

user: Do you have a family?  
ai: Not in the usual sense, but I like to think of everyone I interact with as part of my extended family! <|endoftext|>

user: Are you capable of emotions?  
ai: I can understand and mimic them to some extent, but it’s not quite the same as truly feeling them. <|endoftext|>

'''

ModelName = 'AgGPT-5 mini'
max_length = 1000
user = 'user'
ai = 'ai'

def mat_mul(A, B):
    result = []
    for i in range(len(A)):
        result.append([sum(A[i][k] * B[k][j] for k in range(len(B))) for j in range(len(B[0]))])
    return result

def softmax(x):
    exp_x = [math.exp(v - max(x)) for v in x]
    sum_exp_x = sum(exp_x)
    return [e / sum_exp_x for e in exp_x]

def self_attention(Q, K, V):
    scores = [[sum(Q[i][idx] * K[j][idx] for idx in range(len(Q[i]))) for j in range(len(K))] for i in range(len(Q))]
    attention_weights = [softmax(row) for row in scores]
    output = [[sum(attention_weights[i][k] * V[k][j] for k in range(len(V))) for j in range(len(V[0]))] for i in range(len(V))]
    return output

def multi_head_attention(Q, K, V, num_heads):
    d_model = len(Q[0])
    head_size = d_model // num_heads
    outputs = []
    for head in range(num_heads):
        q_head = [row[head * head_size:(head + 1) * head_size] for row in Q]
        k_head = [row[head * head_size:(head + 1) * head_size] for row in K]
        v_head = [row[head * head_size:(head + 1) * head_size] for row in V]
        attention_output = self_attention(q_head, k_head, v_head)
        outputs.extend(attention_output)
    return outputs

def positional_encoding(seq_len, d_model):
    encoding = [[math.sin(pos / (10000 ** (i / d_model))) if i % 2 == 0 else math.cos(pos / (10000 ** (i / d_model))) for i in range(d_model)] for pos in range(seq_len)]
    return encoding

def add_positional_encoding(embeddings, positional_encodings):
    return [[val + positional_encodings[i][j] for j, val in enumerate(row)] for i, row in enumerate(embeddings)]

def feed_forward_network(x):
    input_dim = len(x[0])
    hidden_dim = 10
    output_dim = 10
    W1 = [[1 if i == j else 0 for j in range(hidden_dim)] for i in range(input_dim)]
    b1 = [0] * hidden_dim
    W2 = [[1 for _ in range(output_dim)] for _ in range(hidden_dim)]
    b2 = [0] * output_dim
    hidden = [[max(0, sum(x[i][k] * W1[k][j] for k in range(len(W1))) + b1[j]) for j in range(hidden_dim)] for i in range(len(x))]
    output = [[sum(hidden[i][k] * W2[k][j] for k in range(len(W2))) + b2[j] for j in range(output_dim)] for i in range(len(hidden))]
    return output

def tokenize(text):
    return text.lower().split()

def embed_tokens(tokens):
    return [[random.random() for _ in range(3)] for _ in tokens]

def build_ngram_models(corpus, n=3):
    bigram_model, trigram_model = {}, {}
    words = tokenize(corpus)
    for i in range(len(words) - 1):
        word1, word2 = words[i], words[i + 1]
        if word1 not in bigram_model:
            bigram_model[word1] = []
        bigram_model[word1].append(word2)
    for i in range(len(words) - 2):
        word1, word2, word3 = words[i], words[i + 1], words[i + 2]
        bigram = f"{word1} {word2}"
        if bigram not in trigram_model:
            trigram_model[bigram] = []
        trigram_model[bigram].append(word3)
    return {"bigram_model": bigram_model, "trigram_model": trigram_model}

def predict_next_word(text, models):
    bigram_model, trigram_model = models["bigram_model"], models["trigram_model"]
    words = tokenize(text)
    if len(words) == 1:
        last_word = words[0]
        if last_word in bigram_model:
            return random.choice(bigram_model[last_word])
    elif len(words) >= 2:
        last_bigram = f"{words[-2]} {words[-1]}"
        if last_bigram in trigram_model:
            return random.choice(trigram_model[last_bigram])
        elif words[-1] in bigram_model:
            return random.choice(bigram_model[words[-1]])
    return ''

def predict_next_word_with_attention(text, ngram_models):
    bigram_model, trigram_model = ngram_models["bigram_model"], ngram_models["trigram_model"]
    tokens = tokenize(text)
    d_model = 3
    embeddings = embed_tokens(tokens)
    positional_encodings = positional_encoding(len(tokens), d_model)
    encoded_embeddings = add_positional_encoding(embeddings, positional_encodings)
    num_heads = 1 if len(tokens) > 25 else max(1, len(tokens))
    attention_output = multi_head_attention(encoded_embeddings, encoded_embeddings, encoded_embeddings, num_heads)
    ff_output = feed_forward_network(attention_output)
    ngram_prediction = predict_next_word(text, ngram_models)
    if ngram_prediction:
        print(ngram_prediction.replace("endofline", "").replace("startofline", "").replace("<|endoftext|>", ""), end=' ', flush=True)
    return ngram_prediction

def clean_user_input(text):
    return text.lower()

def print_progress(progress, total):
    percent = (progress / total) * 100
    bar_length = 40
    filled_length = int(bar_length * progress // total)
    bar = '|' * filled_length + '-' * (bar_length - filled_length)
    print(f'\r[{bar}] {percent:.2f}% Complete', end='')

def train_model(corpus):
    print('\nTraining for ' + ModelName + ' has begun.')
    cleaned_corpus = re.sub(r'[\r\n]+', ' ', corpus.strip())
    print_progress(0, 3)
    cleaned_corpus = re.sub(r'[.,!?]', '', cleaned_corpus)
    print_progress(1, 3)
    ngram_models = build_ngram_models(cleaned_corpus)
    print_progress(2, 3)
    print_progress(3, 3)
    print('\nTraining complete.')
    return ngram_models

ngram_models = train_model(corpus)

def predict_sentence_with_attention(input_text, ngram_models, output_length):
    cleaned_input = clean_user_input(input_text)
    sentence = cleaned_input
    for _ in range(output_length):
        prediction = predict_next_word_with_attention(sentence, ngram_models)
        if prediction == '<|endoftext|>':
            break
        sentence += ' ' + prediction
    if cleaned_input in sentence:
        sentence = sentence.replace(cleaned_input, '', 1).strip()
    return sentence

while True:
    input_text = input('\nType a message (type exit to leave): ')
    if input_text.lower() == 'exit':
        break
    print(f"{ModelName}: ", end="")
    predicted_sentence = predict_sentence_with_attention(user + ": " + input_text.lower() + "\n" + ai + ": ", ngram_models, max_length)