import tensorflow as tf
from tensorflow.keras import datasets
(x_train, y_train), (x_test, y_test) = datasets.mnist.load_data() #从内置手写数据集中加载
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
df = pd.read_csv("data.csv") #读csv
X = df.drop("label", axis=1).values #从数据里踢出标签列,剩下的做特征列
y = df["label"].values #.values = “只要数据,不要行列索引”
# x: 特征数据, y: 标签
x_train, x_test, y_train, y_test = train_test_split(
X, y,
test_size=0.2, # 20% 作为测试集
random_state=42, # 固定随机种子(可复现)
shuffle=True # 是否打乱数据
)
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
df = pd.read_csv("data.csv")
if df.empty:
raise ValueError("DataFrame is empty")
num_features = 5
x = df.iloc[:, :num_features].values.astype(np.float32)
y = df.iloc[:, num_features:].values.astype(np.float32)
x_train, x_test, y_train, y_test = train_test_split(
x, y, test_size=0.2, random_state=42, shuffle=True
)
归一化:让数据落在 [0,1],训练更稳定、更快
import tensorflow as tf
import numpy as np
# 假设有一个样本数据
x = np.array([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]], dtype=np.float32)
# 归一化
x_norm = tf.keras.utils.normalize(x, axis=1) # axis=1表示按行归一化
print(x_norm)
x_train = x_train / 255.0
x_test = x_test / 255.0
构建模型(定义神经网络结构)
from tensorflow.keras import models, layers
model = models.Sequential([
layers.Flatten(input_shape=(28, 28)), # 拉直
layers.Dense(128, activation='relu'), # 隐藏层
layers.Dense(10, activation='softmax') # 输出层
])
| 层 | 作用 |
|---|---|
| Flatten | 把 28×28 → 784 |
| Dense(128) | 学习特征 |
| Dense(10) | 输出 10 类概率 |
model.compile(
optimizer='adam', # 优化器
loss='sparse_categorical_crossentropy', # 损失函数
metrics=['accuracy'] # 评估指标
)
| 参数 | 意义 |
|---|---|
| optimizer | 怎么更新参数 |
| loss | 怎么衡量预测错误 |
| metrics | 训练时显示什么指标 |
| 优化器 | 简介 | 常用参数 |
|---|---|---|
| SGD | 随机梯度下降,基础优化器,可带动量 | learning_rate, momentum, nesterov |
| RMSprop | 自适应学习率优化器,适合RNN | learning_rate, rho, momentum |
| Adam | 自适应矩估计优化器,训练速度快,最常用 | learning_rate, beta_1, beta_2, epsilon |
| Adagrad | 自适应学习率,适合稀疏数据 | learning_rate, epsilon |
| Adadelta | Adagrad改进版本,自动调整学习率 | learning_rate, rho, epsilon |
| Nadam | Adam + Nesterov动量 | learning_rate, beta_1, beta_2, epsilon |
| 任务类型 | 常用 Loss | 说明 |
|---|---|---|
| 二分类 | binary_crossentropy |
输出 1 个节点,sigmoid 激活 |
| 多分类(one-hot) | categorical_crossentropy |
输出多节点,softmax 激活 |
| 多分类(整数标签) | sparse_categorical_crossentropy |
标签为整数,不用 one-hot |
| Loss | 说明 |
|---|---|
mse (Mean Squared Error) |
均方误差,常用 |
mae (Mean Absolute Error) |
平均绝对误差,鲁棒 |
huber |
Huber 损失,兼顾 MSE 与 MAE |
分类指标
accuracy / categorical_accuracy / sparse_categorical_accuracyPrecision(精确率)、Recall(召回率)、AUC(ROC 曲线下面积)mae(平均绝对误差)、mse(均方误差)、mape(平均百分比误差)loss, accuracy = model.evaluate(x_test, y_test, batch_size=32, verbose=1)
x_test:输入特征数据
y_test:对应标签
batch_size:每次处理多少样本,默认32
verbose:输出信息,0表示静默,1表示进度条
compile 时只指定了 loss,就只返回一个 lossmetrics=['accuracy'],返回 [loss, accuracy]loss, accuracy = model.evaluate(x_test, y_test)
print("测试集损失:", loss)
print("测试集准确率:", accuracy)
pred = model.predict(x_test)
model.save("mnist_model")
model = tf.keras.models.load_model("mnist_model")