|
|
@@ -0,0 +1,372 @@
|
|
|
+import pandas as pd
|
|
|
+import numpy as np
|
|
|
+import matplotlib.pyplot as plt
|
|
|
+from sklearn.ensemble import RandomForestRegressor
|
|
|
+from sklearn.preprocessing import PolynomialFeatures
|
|
|
+from sklearn.linear_model import LinearRegression
|
|
|
+from sklearn.pipeline import make_pipeline
|
|
|
+
|
|
|
+# Cargar datos
|
|
|
+df = pd.read_excel('Bueno.xlsx', sheet_name='resultados1')
|
|
|
+
|
|
|
+# Verificar columnas
|
|
|
+print(df.columns)
|
|
|
+# Nos interesan: tf, tw, Flecha_Media
|
|
|
+
|
|
|
+# Exploración rápida
|
|
|
+print(df[['tf', 'tw', 'Flecha_Media']].describe())
|
|
|
+
|
|
|
+def pareto_frontier(X, Y, maximize_X=True, minimize_Y=True):
|
|
|
+ """
|
|
|
+ Devuelve una máscara booleana con los puntos que pertenecen al frente de Pareto.
|
|
|
+ X: array de objetivo 1 (tw)
|
|
|
+ Y: array de objetivo 2 (tf)
|
|
|
+ maximize_X: si True, se maximiza X (lo convertimos a -X)
|
|
|
+ minimize_Y: si True, se minimiza Y (se deja igual)
|
|
|
+ """
|
|
|
+ if not maximize_X:
|
|
|
+ X = -X
|
|
|
+ if minimize_Y:
|
|
|
+ Y = -Y
|
|
|
+
|
|
|
+ costs = np.vstack((X, Y)).T
|
|
|
+ is_efficient = np.ones(costs.shape[0], dtype=bool)
|
|
|
+ for i, c in enumerate(costs):
|
|
|
+ if is_efficient[i]:
|
|
|
+ # Un punto es dominado si existe otro que es <= en todos los objetivos y < en al menos uno
|
|
|
+ dominated = np.all(costs[is_efficient] <= c, axis=1) & np.any(costs[is_efficient] < c, axis=1)
|
|
|
+ is_efficient[is_efficient] = ~dominated
|
|
|
+ is_efficient[i] = True # el punto actual no se elimina a sí mismo
|
|
|
+ return is_efficient
|
|
|
+
|
|
|
+# Para una sola variable objetivo (Flecha_Media), usamos todos los puntos
|
|
|
+df_pareto = df.copy()
|
|
|
+
|
|
|
+print(f"Total puntos: {len(df)}")
|
|
|
+
|
|
|
+# Visualizar nube de puntos en 3D: tw, tf, Flecha_Media
|
|
|
+from mpl_toolkits.mplot3d import Axes3D
|
|
|
+
|
|
|
+fig = plt.figure(figsize=(12, 8))
|
|
|
+ax = fig.add_subplot(111, projection='3d')
|
|
|
+scatter = ax.scatter(df_pareto['tw'], df_pareto['tf'], df_pareto['Flecha_Media'],
|
|
|
+ c=df_pareto['Flecha_Media'], cmap='viridis', s=30, alpha=0.6)
|
|
|
+ax.set_xlabel('tw')
|
|
|
+ax.set_ylabel('tf')
|
|
|
+ax.set_zlabel('Flecha Media')
|
|
|
+ax.set_title('Nube de puntos: Flecha_Media = f(tw, tf)')
|
|
|
+plt.colorbar(scatter, ax=ax, label='Flecha Media')
|
|
|
+plt.show()
|
|
|
+
|
|
|
+# Preparar variables
|
|
|
+X = df_pareto[['tw', 'tf']].values
|
|
|
+y = df_pareto['Flecha_Media'].values
|
|
|
+
|
|
|
+# Crear pipeline con características polinómicas de grado 2 (puedes probar grado 3)
|
|
|
+model = make_pipeline(PolynomialFeatures(degree=2, include_bias=False),
|
|
|
+ LinearRegression())
|
|
|
+model.fit(X, y)
|
|
|
+
|
|
|
+# Evaluar ajuste
|
|
|
+y_pred = model.predict(X)
|
|
|
+r2 = model.score(X, y)
|
|
|
+print(f"R² en Flecha_Media (Modelo Polinómico): {r2:.4f}")
|
|
|
+
|
|
|
+# Extraer coeficientes (opcional, para ver la expresión)
|
|
|
+linear_step = model.named_steps['linearregression']
|
|
|
+poly_step = model.named_steps['polynomialfeatures']
|
|
|
+feature_names = poly_step.get_feature_names_out(['tw', 'tf'])
|
|
|
+coef_df = pd.DataFrame({'feature': feature_names, 'coef': linear_step.coef_})
|
|
|
+print("\nCoeficientes del modelo polinómico:")
|
|
|
+print(coef_df)
|
|
|
+
|
|
|
+# Guardar también el intercept
|
|
|
+intercept = linear_step.intercept_
|
|
|
+print(f"Intercept: {intercept:.6f}")
|
|
|
+
|
|
|
+rf = RandomForestRegressor(n_estimators=100, random_state=42)
|
|
|
+rf.fit(X, y)
|
|
|
+r2_rf = rf.score(X, y)
|
|
|
+print(f"\nR² Random Forest: {r2_rf:.4f}")
|
|
|
+
|
|
|
+from gplearn.genetic import SymbolicRegressor
|
|
|
+
|
|
|
+# Configurar regresión simbólica
|
|
|
+est_gp = SymbolicRegressor(population_size=2000,
|
|
|
+ generations=20,
|
|
|
+ stopping_criteria=0.01,
|
|
|
+ p_crossover=0.7,
|
|
|
+ p_subtree_mutation=0.1,
|
|
|
+ p_hoist_mutation=0.05,
|
|
|
+ p_point_mutation=0.1,
|
|
|
+ max_samples=0.9,
|
|
|
+ verbose=1,
|
|
|
+ parsimony_coefficient=0.01,
|
|
|
+ random_state=0)
|
|
|
+est_gp.fit(X, y)
|
|
|
+
|
|
|
+# Mostrar la expresión simbólica de forma legible
|
|
|
+print("\n" + "="*80)
|
|
|
+print("EXPRESIÓN SIMBÓLICA ENCONTRADA (Regresión Genética):")
|
|
|
+print("="*80)
|
|
|
+
|
|
|
+# Función para formatear el árbol de forma legible
|
|
|
+def format_expression(expr_str, indent=0):
|
|
|
+ """Formatea recursivamente la expresión en un árbol legible"""
|
|
|
+ expr_str = expr_str.strip()
|
|
|
+
|
|
|
+ # Identificar operación y argumentos
|
|
|
+ if expr_str[0] in ['a', 's', 'm', 'd']: # add, sub, mul, div
|
|
|
+ # Encontrar la operación
|
|
|
+ paren_count = 0
|
|
|
+ op_end = 0
|
|
|
+ for i, c in enumerate(expr_str):
|
|
|
+ if c == '(':
|
|
|
+ paren_count += 1
|
|
|
+ op_end = i
|
|
|
+ break
|
|
|
+
|
|
|
+ operation = expr_str[:op_end]
|
|
|
+
|
|
|
+ # Extraer argumentos
|
|
|
+ inner = expr_str[op_end+1:-1] # Quita ( y )
|
|
|
+ args = []
|
|
|
+ current_arg = ""
|
|
|
+ paren_count = 0
|
|
|
+
|
|
|
+ for c in inner:
|
|
|
+ if c == ',' and paren_count == 0:
|
|
|
+ args.append(current_arg)
|
|
|
+ current_arg = ""
|
|
|
+ else:
|
|
|
+ if c == '(':
|
|
|
+ paren_count += 1
|
|
|
+ elif c == ')':
|
|
|
+ paren_count -= 1
|
|
|
+ current_arg += c
|
|
|
+ args.append(current_arg)
|
|
|
+
|
|
|
+ # Mapear operaciones a símbolos
|
|
|
+ op_map = {'add': '+', 'sub': '−', 'mul': '×', 'div': '÷'}
|
|
|
+ op_symbol = op_map.get(operation, operation)
|
|
|
+
|
|
|
+ result = " " * indent + f"({op_symbol})\n"
|
|
|
+ for i, arg in enumerate(args):
|
|
|
+ result += format_expression(arg, indent + 2)
|
|
|
+ return result
|
|
|
+ else:
|
|
|
+ # Es un número o variable
|
|
|
+ return " " * indent + f"├─ {expr_str}\n"
|
|
|
+
|
|
|
+expr_tree_formatted = format_expression(str(est_gp._program))
|
|
|
+print("Árbol de operaciones:")
|
|
|
+print(expr_tree_formatted)
|
|
|
+
|
|
|
+# Calcular R² de la regresión simbólica
|
|
|
+y_pred_gp = est_gp.predict(X)
|
|
|
+r2_gp = 1 - np.sum((y - y_pred_gp)**2) / np.sum((y - np.mean(y))**2)
|
|
|
+print(f"R² Regresión Simbólica: {r2_gp:.4f}\n")
|
|
|
+
|
|
|
+# Función para convertir la notación prefija a LaTeX
|
|
|
+expr_str = str(est_gp._program)
|
|
|
+
|
|
|
+def prefix_to_latex(expr_str):
|
|
|
+ """Convierte notación prefija (add(a,b)) a LaTeX"""
|
|
|
+ expr_str = expr_str.strip()
|
|
|
+
|
|
|
+ # Si es un número o variable, retorna directamente
|
|
|
+ if expr_str[0] not in ['a', 's', 'm', 'd']:
|
|
|
+ return expr_str
|
|
|
+
|
|
|
+ # Encuentra la operación
|
|
|
+ op_end = 0
|
|
|
+ for i, c in enumerate(expr_str):
|
|
|
+ if c == '(':
|
|
|
+ operation = expr_str[:i]
|
|
|
+ op_end = i
|
|
|
+ break
|
|
|
+
|
|
|
+ # Extrae los argumentos
|
|
|
+ inner = expr_str[op_end+1:-1]
|
|
|
+ args = []
|
|
|
+ current = ""
|
|
|
+ paren_depth = 0
|
|
|
+
|
|
|
+ for c in inner:
|
|
|
+ if c == ',' and paren_depth == 0:
|
|
|
+ args.append(current.strip())
|
|
|
+ current = ""
|
|
|
+ else:
|
|
|
+ if c == '(':
|
|
|
+ paren_depth += 1
|
|
|
+ elif c == ')':
|
|
|
+ paren_depth -= 1
|
|
|
+ current += c
|
|
|
+ args.append(current.strip())
|
|
|
+
|
|
|
+ # Convierte recursivamente
|
|
|
+ converted_args = [prefix_to_latex(arg) for arg in args]
|
|
|
+
|
|
|
+ if operation == 'add':
|
|
|
+ return "({} + {})".format(converted_args[0], converted_args[1])
|
|
|
+ elif operation == 'sub':
|
|
|
+ return "({} - {})".format(converted_args[0], converted_args[1])
|
|
|
+ elif operation == 'mul':
|
|
|
+ return "{{{0}}} \\times {{{1}}}".format(converted_args[0], converted_args[1])
|
|
|
+ elif operation == 'div':
|
|
|
+ return "\\frac{{{0}}}{{{1}}}".format(converted_args[0], converted_args[1])
|
|
|
+ else:
|
|
|
+ return "({})".format(', '.join(converted_args))
|
|
|
+
|
|
|
+expr_latex = prefix_to_latex(expr_str)
|
|
|
+print("\nExpresión en LaTeX:")
|
|
|
+print(expr_latex)
|
|
|
+
|
|
|
+# Guardar la ecuación a un archivo de texto para referencia
|
|
|
+with open('expresion_simbolica.txt', 'w', encoding='utf-8') as f:
|
|
|
+ f.write("EXPRESIONES OBTENIDAS - FLECHA_MEDIA = f(tw, tf)\n")
|
|
|
+ f.write("="*80 + "\n")
|
|
|
+ f.write("VARIABLES:\n")
|
|
|
+ f.write("X0 = tw (espesor del alma / web thickness)\n")
|
|
|
+ f.write("X1 = tf (espesor del ala / flange thickness)\n")
|
|
|
+ f.write("="*80 + "\n\n")
|
|
|
+
|
|
|
+ # Modelo Polinómico
|
|
|
+ f.write("1. MODELO POLINÓMICO (R² = {:.4f})\n".format(r2))
|
|
|
+ f.write("-"*80 + "\n")
|
|
|
+ f.write("Ecuación en LaTeX:\n\n")
|
|
|
+ f.write("Flecha_{{Media}} = {:.6f}".format(intercept))
|
|
|
+ for feat, coef in zip(feature_names, linear_step.coef_):
|
|
|
+ sign = "+" if coef >= 0 else "-"
|
|
|
+ f.write(" {0} {1:.6f} \\cdot {2}".format(sign, abs(coef), feat))
|
|
|
+ f.write("\n\n")
|
|
|
+ f.write("Coeficientes:\n")
|
|
|
+ f.write(coef_df.to_string())
|
|
|
+ f.write("\n\n")
|
|
|
+
|
|
|
+ # Random Forest
|
|
|
+ f.write("2. RANDOM FOREST (R² = {:.4f})\n".format(r2_rf))
|
|
|
+ f.write("-"*80 + "\n")
|
|
|
+ f.write("El modelo Random Forest es un ensemble de 100 árboles de decisión.\n")
|
|
|
+ f.write("No tiene una ecuación analítica cerrada.\n")
|
|
|
+ f.write("Importancias de características (feature importance):\n")
|
|
|
+ importances = rf.feature_importances_
|
|
|
+ for name, imp in zip(['tw', 'tf'], importances):
|
|
|
+ f.write(" - {0}: {1:.4f}\n".format(name, imp))
|
|
|
+ f.write("\n")
|
|
|
+
|
|
|
+ # Regresión Simbólica
|
|
|
+ f.write("3. REGRESIÓN SIMBÓLICA / GENETIC PROGRAMMING (R² = {:.4f})\n".format(r2_gp))
|
|
|
+ f.write("-"*80 + "\n")
|
|
|
+ f.write("Árbol de operaciones:\n")
|
|
|
+ f.write(expr_tree_formatted)
|
|
|
+ f.write("\nEcuación en LaTeX:\n\n")
|
|
|
+ f.write(expr_latex + "\n\n")
|
|
|
+ f.write("Notación de árbol (formato original):\n")
|
|
|
+ f.write(str(est_gp._program) + "\n\n")
|
|
|
+
|
|
|
+ f.write("="*80 + "\n")
|
|
|
+ f.write("RESUMEN R²:\n")
|
|
|
+ f.write(" Polinómico: {:.4f}\n".format(r2))
|
|
|
+ f.write(" Random Forest: {:.4f}\n".format(r2_rf))
|
|
|
+ f.write(" Simbólico: {:.4f}\n".format(r2_gp))
|
|
|
+ f.write("="*80 + "\n")
|
|
|
+
|
|
|
+print("\n✓ Expresión guardada en 'expresion_simbolica.txt'")
|
|
|
+
|
|
|
+# Comparación de todos los modelos
|
|
|
+print("="*80)
|
|
|
+print("RESUMEN DE MODELOS:")
|
|
|
+print("="*80)
|
|
|
+print(f"R² Modelo Polinómico: {r2:.4f}")
|
|
|
+print(f"R² Random Forest: {r2_rf:.4f}")
|
|
|
+print(f"R² Regresión Simbólica: {r2_gp:.4f}")
|
|
|
+print("="*80 + "\n")
|
|
|
+
|
|
|
+# Crear malla para visualizar la superficie ajustada
|
|
|
+tw_min, tw_max = df_pareto['tw'].min(), df_pareto['tw'].max()
|
|
|
+tf_min, tf_max = df_pareto['tf'].min(), df_pareto['tf'].max()
|
|
|
+
|
|
|
+tw_range = np.linspace(tw_min, tw_max, 30)
|
|
|
+tf_range = np.linspace(tf_min, tf_max, 30)
|
|
|
+tw_mesh, tf_mesh = np.meshgrid(tw_range, tf_range)
|
|
|
+
|
|
|
+# Predicciones con el modelo polinómico
|
|
|
+X_plot = np.column_stack([tw_mesh.ravel(), tf_mesh.ravel()])
|
|
|
+flecha_pred = model.predict(X_plot)
|
|
|
+flecha_mesh = flecha_pred.reshape(tw_mesh.shape)
|
|
|
+
|
|
|
+# Visualizar superficie ajustada
|
|
|
+fig = plt.figure(figsize=(16, 10))
|
|
|
+
|
|
|
+# Subplot 1: Datos reales + superficie del modelo polinómico
|
|
|
+ax1 = fig.add_subplot(231, projection='3d')
|
|
|
+ax1.scatter(df_pareto['tw'], df_pareto['tf'], df_pareto['Flecha_Media'],
|
|
|
+ c='red', s=30, alpha=0.6, label='Datos reales')
|
|
|
+ax1.plot_surface(tw_mesh, tf_mesh, flecha_mesh, alpha=0.5, cmap='coolwarm')
|
|
|
+ax1.set_xlabel('tw')
|
|
|
+ax1.set_ylabel('tf')
|
|
|
+ax1.set_zlabel('Flecha Media')
|
|
|
+ax1.set_title(f'Modelo Polinómico (R²={r2:.4f})')
|
|
|
+ax1.legend()
|
|
|
+
|
|
|
+# Predicciones con Random Forest
|
|
|
+flecha_pred_rf = rf.predict(X_plot)
|
|
|
+flecha_mesh_rf = flecha_pred_rf.reshape(tw_mesh.shape)
|
|
|
+
|
|
|
+# Subplot 2: Datos reales + superficie del Random Forest
|
|
|
+ax2 = fig.add_subplot(232, projection='3d')
|
|
|
+ax2.scatter(df_pareto['tw'], df_pareto['tf'], df_pareto['Flecha_Media'],
|
|
|
+ c='red', s=30, alpha=0.6, label='Datos reales')
|
|
|
+ax2.plot_surface(tw_mesh, tf_mesh, flecha_mesh_rf, alpha=0.5, cmap='coolwarm')
|
|
|
+ax2.set_xlabel('tw')
|
|
|
+ax2.set_ylabel('tf')
|
|
|
+ax2.set_zlabel('Flecha Media')
|
|
|
+ax2.set_title(f'Random Forest (R²={r2_rf:.4f})')
|
|
|
+ax2.legend()
|
|
|
+
|
|
|
+# Predicciones con Regresión Simbólica
|
|
|
+flecha_pred_gp = est_gp.predict(X_plot)
|
|
|
+flecha_mesh_gp = flecha_pred_gp.reshape(tw_mesh.shape)
|
|
|
+
|
|
|
+# Subplot 3: Datos reales + superficie de Regresión Simbólica
|
|
|
+ax3 = fig.add_subplot(233, projection='3d')
|
|
|
+ax3.scatter(df_pareto['tw'], df_pareto['tf'], df_pareto['Flecha_Media'],
|
|
|
+ c='red', s=30, alpha=0.6, label='Datos reales')
|
|
|
+ax3.plot_surface(tw_mesh, tf_mesh, flecha_mesh_gp, alpha=0.5, cmap='coolwarm')
|
|
|
+ax3.set_xlabel('tw')
|
|
|
+ax3.set_ylabel('tf')
|
|
|
+ax3.set_zlabel('Flecha Media')
|
|
|
+ax3.set_title(f'Regresión Simbólica (R²={r2_gp:.4f})')
|
|
|
+ax3.legend()
|
|
|
+
|
|
|
+# Subplots 4, 5, 6: Residuos
|
|
|
+ax4 = fig.add_subplot(234)
|
|
|
+residuos_poly = y - y_pred
|
|
|
+ax4.scatter(y_pred, residuos_poly, alpha=0.5)
|
|
|
+ax4.axhline(y=0, color='r', linestyle='--')
|
|
|
+ax4.set_xlabel('Predicciones')
|
|
|
+ax4.set_ylabel('Residuos')
|
|
|
+ax4.set_title('Residuos - Modelo Polinómico')
|
|
|
+ax4.grid(True, alpha=0.3)
|
|
|
+
|
|
|
+ax5 = fig.add_subplot(235)
|
|
|
+residuos_rf = y - rf.predict(X)
|
|
|
+ax5.scatter(rf.predict(X), residuos_rf, alpha=0.5)
|
|
|
+ax5.axhline(y=0, color='r', linestyle='--')
|
|
|
+ax5.set_xlabel('Predicciones')
|
|
|
+ax5.set_ylabel('Residuos')
|
|
|
+ax5.set_title('Residuos - Random Forest')
|
|
|
+ax5.grid(True, alpha=0.3)
|
|
|
+
|
|
|
+ax6 = fig.add_subplot(236)
|
|
|
+residuos_gp = y - y_pred_gp
|
|
|
+ax6.scatter(y_pred_gp, residuos_gp, alpha=0.5)
|
|
|
+ax6.axhline(y=0, color='r', linestyle='--')
|
|
|
+ax6.set_xlabel('Predicciones')
|
|
|
+ax6.set_ylabel('Residuos')
|
|
|
+ax6.set_title('Residuos - Regresión Simbólica')
|
|
|
+ax6.grid(True, alpha=0.3)
|
|
|
+
|
|
|
+plt.tight_layout()
|
|
|
+plt.show()
|