import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline

# Cargar datos
df = pd.read_excel('Bueno.xlsx', sheet_name='resultados1')

# Verificar columnas
print(df.columns)
# Nos interesan: tf, tw, Flecha_Media

# Exploración rápida
print(df[['tf', 'tw', 'Flecha_Media']].describe())

def pareto_frontier(X, Y, maximize_X=True, minimize_Y=True):
    """
    Devuelve una máscara booleana con los puntos que pertenecen al frente de Pareto.
    X: array de objetivo 1 (tw)
    Y: array de objetivo 2 (tf)
    maximize_X: si True, se maximiza X (lo convertimos a -X)
    minimize_Y: si True, se minimiza Y (se deja igual)
    """
    if not maximize_X:
        X = -X
    if minimize_Y:
        Y = -Y
    
    costs = np.vstack((X, Y)).T
    is_efficient = np.ones(costs.shape[0], dtype=bool)
    for i, c in enumerate(costs):
        if is_efficient[i]:
            # Un punto es dominado si existe otro que es <= en todos los objetivos y < en al menos uno
            dominated = np.all(costs[is_efficient] <= c, axis=1) & np.any(costs[is_efficient] < c, axis=1)
            is_efficient[is_efficient] = ~dominated
            is_efficient[i] = True  # el punto actual no se elimina a sí mismo
    return is_efficient

# Para una sola variable objetivo (Flecha_Media), usamos todos los puntos
df_pareto = df.copy()

print(f"Total puntos: {len(df)}")

# Visualizar nube de puntos en 3D: tw, tf, Flecha_Media
from mpl_toolkits.mplot3d import Axes3D

fig = plt.figure(figsize=(12, 8))
ax = fig.add_subplot(111, projection='3d')
scatter = ax.scatter(df_pareto['tw'], df_pareto['tf'], df_pareto['Flecha_Media'], 
                     c=df_pareto['Flecha_Media'], cmap='viridis', s=30, alpha=0.6)
ax.set_xlabel('tw')
ax.set_ylabel('tf')
ax.set_zlabel('Flecha Media')
ax.set_title('Nube de puntos: Flecha_Media = f(tw, tf)')
plt.colorbar(scatter, ax=ax, label='Flecha Media')
plt.show()

# Preparar variables
X = df_pareto[['tw', 'tf']].values
y = df_pareto['Flecha_Media'].values

# Crear pipeline con características polinómicas de grado 2 (puedes probar grado 3)
model = make_pipeline(PolynomialFeatures(degree=2, include_bias=False),
                      LinearRegression())
model.fit(X, y)

# Evaluar ajuste
y_pred = model.predict(X)
r2 = model.score(X, y)
print(f"R² en Flecha_Media (Modelo Polinómico): {r2:.4f}")

# Extraer coeficientes (opcional, para ver la expresión)
linear_step = model.named_steps['linearregression']
poly_step = model.named_steps['polynomialfeatures']
feature_names = poly_step.get_feature_names_out(['tw', 'tf'])
coef_df = pd.DataFrame({'feature': feature_names, 'coef': linear_step.coef_})
print("\nCoeficientes del modelo polinómico:")
print(coef_df)

# Guardar también el intercept
intercept = linear_step.intercept_
print(f"Intercept: {intercept:.6f}")

rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(X, y)
r2_rf = rf.score(X, y)
print(f"\nR² Random Forest: {r2_rf:.4f}")

from gplearn.genetic import SymbolicRegressor

# Configurar regresión simbólica
est_gp = SymbolicRegressor(population_size=2000,
                           generations=20,
                           stopping_criteria=0.01,
                           p_crossover=0.7,
                           p_subtree_mutation=0.1,
                           p_hoist_mutation=0.05,
                           p_point_mutation=0.1,
                           max_samples=0.9,
                           verbose=1,
                           parsimony_coefficient=0.01,
                           random_state=0)
est_gp.fit(X, y)

# Mostrar la expresión simbólica de forma legible
print("\n" + "="*80)
print("EXPRESIÓN SIMBÓLICA ENCONTRADA (Regresión Genética):")
print("="*80)

# Función para formatear el árbol de forma legible
def format_expression(expr_str, indent=0):
    """Formatea recursivamente la expresión en un árbol legible"""
    expr_str = expr_str.strip()
    
    # Identificar operación y argumentos
    if expr_str[0] in ['a', 's', 'm', 'd']:  # add, sub, mul, div
        # Encontrar la operación
        paren_count = 0
        op_end = 0
        for i, c in enumerate(expr_str):
            if c == '(':
                paren_count += 1
                op_end = i
                break
        
        operation = expr_str[:op_end]
        
        # Extraer argumentos
        inner = expr_str[op_end+1:-1]  # Quita ( y )
        args = []
        current_arg = ""
        paren_count = 0
        
        for c in inner:
            if c == ',' and paren_count == 0:
                args.append(current_arg)
                current_arg = ""
            else:
                if c == '(':
                    paren_count += 1
                elif c == ')':
                    paren_count -= 1
                current_arg += c
        args.append(current_arg)
        
        # Mapear operaciones a símbolos
        op_map = {'add': '+', 'sub': '−', 'mul': '×', 'div': '÷'}
        op_symbol = op_map.get(operation, operation)
        
        result = " " * indent + f"({op_symbol})\n"
        for i, arg in enumerate(args):
            result += format_expression(arg, indent + 2)
        return result
    else:
        # Es un número o variable
        return " " * indent + f"├─ {expr_str}\n"

expr_tree_formatted = format_expression(str(est_gp._program))
print("Árbol de operaciones:")
print(expr_tree_formatted)

# Calcular R² de la regresión simbólica
y_pred_gp = est_gp.predict(X)
r2_gp = 1 - np.sum((y - y_pred_gp)**2) / np.sum((y - np.mean(y))**2)
print(f"R² Regresión Simbólica: {r2_gp:.4f}\n")

# Función para convertir la notación prefija a LaTeX
expr_str = str(est_gp._program)

def prefix_to_latex(expr_str):
    """Convierte notación prefija (add(a,b)) a LaTeX"""
    expr_str = expr_str.strip()
    
    # Si es un número o variable, retorna directamente
    if expr_str[0] not in ['a', 's', 'm', 'd']:
        return expr_str
    
    # Encuentra la operación
    op_end = 0
    for i, c in enumerate(expr_str):
        if c == '(':
            operation = expr_str[:i]
            op_end = i
            break
    
    # Extrae los argumentos
    inner = expr_str[op_end+1:-1]
    args = []
    current = ""
    paren_depth = 0
    
    for c in inner:
        if c == ',' and paren_depth == 0:
            args.append(current.strip())
            current = ""
        else:
            if c == '(':
                paren_depth += 1
            elif c == ')':
                paren_depth -= 1
            current += c
    args.append(current.strip())
    
    # Convierte recursivamente
    converted_args = [prefix_to_latex(arg) for arg in args]
    
    if operation == 'add':
        return "({} + {})".format(converted_args[0], converted_args[1])
    elif operation == 'sub':
        return "({} - {})".format(converted_args[0], converted_args[1])
    elif operation == 'mul':
        return "{{{0}}} \\times {{{1}}}".format(converted_args[0], converted_args[1])
    elif operation == 'div':
        return "\\frac{{{0}}}{{{1}}}".format(converted_args[0], converted_args[1])
    else:
        return "({})".format(', '.join(converted_args))

expr_latex = prefix_to_latex(expr_str)
print("\nExpresión en LaTeX:")
print(expr_latex)

# Guardar la ecuación a un archivo de texto para referencia
with open('expresion_simbolica.txt', 'w', encoding='utf-8') as f:
    f.write("EXPRESIONES OBTENIDAS - FLECHA_MEDIA = f(tw, tf)\n")
    f.write("="*80 + "\n")
    f.write("VARIABLES:\n")
    f.write("X0 = tw (espesor del alma / web thickness)\n")
    f.write("X1 = tf (espesor del ala / flange thickness)\n")
    f.write("="*80 + "\n\n")
    
    # Modelo Polinómico
    f.write("1. MODELO POLINÓMICO (R² = {:.4f})\n".format(r2))
    f.write("-"*80 + "\n")
    f.write("Ecuación en LaTeX:\n\n")
    f.write("Flecha_{{Media}} = {:.6f}".format(intercept))
    for feat, coef in zip(feature_names, linear_step.coef_):
        sign = "+" if coef >= 0 else "-"
        f.write(" {0} {1:.6f} \\cdot {2}".format(sign, abs(coef), feat))
    f.write("\n\n")
    f.write("Coeficientes:\n")
    f.write(coef_df.to_string())
    f.write("\n\n")
    
    # Random Forest
    f.write("2. RANDOM FOREST (R² = {:.4f})\n".format(r2_rf))
    f.write("-"*80 + "\n")
    f.write("El modelo Random Forest es un ensemble de 100 árboles de decisión.\n")
    f.write("No tiene una ecuación analítica cerrada.\n")
    f.write("Importancias de características (feature importance):\n")
    importances = rf.feature_importances_
    for name, imp in zip(['tw', 'tf'], importances):
        f.write("  - {0}: {1:.4f}\n".format(name, imp))
    f.write("\n")
    
    # Regresión Simbólica
    f.write("3. REGRESIÓN SIMBÓLICA / GENETIC PROGRAMMING (R² = {:.4f})\n".format(r2_gp))
    f.write("-"*80 + "\n")
    f.write("Árbol de operaciones:\n")
    f.write(expr_tree_formatted)
    f.write("\nEcuación en LaTeX:\n\n")
    f.write(expr_latex + "\n\n")
    f.write("Notación de árbol (formato original):\n")
    f.write(str(est_gp._program) + "\n\n")
    
    f.write("="*80 + "\n")
    f.write("RESUMEN R²:\n")
    f.write("  Polinómico:     {:.4f}\n".format(r2))
    f.write("  Random Forest:  {:.4f}\n".format(r2_rf))
    f.write("  Simbólico:      {:.4f}\n".format(r2_gp))
    f.write("="*80 + "\n")

print("\n✓ Expresión guardada en 'expresion_simbolica.txt'")

# Comparación de todos los modelos
print("="*80)
print("RESUMEN DE MODELOS:")
print("="*80)
print(f"R² Modelo Polinómico:     {r2:.4f}")
print(f"R² Random Forest:         {r2_rf:.4f}")
print(f"R² Regresión Simbólica:   {r2_gp:.4f}")
print("="*80 + "\n")

# Crear malla para visualizar la superficie ajustada
tw_min, tw_max = df_pareto['tw'].min(), df_pareto['tw'].max()
tf_min, tf_max = df_pareto['tf'].min(), df_pareto['tf'].max()

tw_range = np.linspace(tw_min, tw_max, 30)
tf_range = np.linspace(tf_min, tf_max, 30)
tw_mesh, tf_mesh = np.meshgrid(tw_range, tf_range)

# Predicciones con el modelo polinómico
X_plot = np.column_stack([tw_mesh.ravel(), tf_mesh.ravel()])
flecha_pred = model.predict(X_plot)
flecha_mesh = flecha_pred.reshape(tw_mesh.shape)

# Visualizar superficie ajustada
fig = plt.figure(figsize=(16, 10))

# Subplot 1: Datos reales + superficie del modelo polinómico
ax1 = fig.add_subplot(231, projection='3d')
ax1.scatter(df_pareto['tw'], df_pareto['tf'], df_pareto['Flecha_Media'], 
           c='red', s=30, alpha=0.6, label='Datos reales')
ax1.plot_surface(tw_mesh, tf_mesh, flecha_mesh, alpha=0.5, cmap='coolwarm')
ax1.set_xlabel('tw')
ax1.set_ylabel('tf')
ax1.set_zlabel('Flecha Media')
ax1.set_title(f'Modelo Polinómico (R²={r2:.4f})')
ax1.legend()

# Predicciones con Random Forest
flecha_pred_rf = rf.predict(X_plot)
flecha_mesh_rf = flecha_pred_rf.reshape(tw_mesh.shape)

# Subplot 2: Datos reales + superficie del Random Forest
ax2 = fig.add_subplot(232, projection='3d')
ax2.scatter(df_pareto['tw'], df_pareto['tf'], df_pareto['Flecha_Media'], 
           c='red', s=30, alpha=0.6, label='Datos reales')
ax2.plot_surface(tw_mesh, tf_mesh, flecha_mesh_rf, alpha=0.5, cmap='coolwarm')
ax2.set_xlabel('tw')
ax2.set_ylabel('tf')
ax2.set_zlabel('Flecha Media')
ax2.set_title(f'Random Forest (R²={r2_rf:.4f})')
ax2.legend()

# Predicciones con Regresión Simbólica
flecha_pred_gp = est_gp.predict(X_plot)
flecha_mesh_gp = flecha_pred_gp.reshape(tw_mesh.shape)

# Subplot 3: Datos reales + superficie de Regresión Simbólica
ax3 = fig.add_subplot(233, projection='3d')
ax3.scatter(df_pareto['tw'], df_pareto['tf'], df_pareto['Flecha_Media'], 
           c='red', s=30, alpha=0.6, label='Datos reales')
ax3.plot_surface(tw_mesh, tf_mesh, flecha_mesh_gp, alpha=0.5, cmap='coolwarm')
ax3.set_xlabel('tw')
ax3.set_ylabel('tf')
ax3.set_zlabel('Flecha Media')
ax3.set_title(f'Regresión Simbólica (R²={r2_gp:.4f})')
ax3.legend()

# Subplots 4, 5, 6: Residuos
ax4 = fig.add_subplot(234)
residuos_poly = y - y_pred
ax4.scatter(y_pred, residuos_poly, alpha=0.5)
ax4.axhline(y=0, color='r', linestyle='--')
ax4.set_xlabel('Predicciones')
ax4.set_ylabel('Residuos')
ax4.set_title('Residuos - Modelo Polinómico')
ax4.grid(True, alpha=0.3)

ax5 = fig.add_subplot(235)
residuos_rf = y - rf.predict(X)
ax5.scatter(rf.predict(X), residuos_rf, alpha=0.5)
ax5.axhline(y=0, color='r', linestyle='--')
ax5.set_xlabel('Predicciones')
ax5.set_ylabel('Residuos')
ax5.set_title('Residuos - Random Forest')
ax5.grid(True, alpha=0.3)

ax6 = fig.add_subplot(236)
residuos_gp = y - y_pred_gp
ax6.scatter(y_pred_gp, residuos_gp, alpha=0.5)
ax6.axhline(y=0, color='r', linestyle='--')
ax6.set_xlabel('Predicciones')
ax6.set_ylabel('Residuos')
ax6.set_title('Residuos - Regresión Simbólica')
ax6.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()