pytorch Dropout過擬合的操作
如下所示:
import torch from torch.autograd import Variable import matplotlib.pyplot as plt torch.manual_seed(1) N_SAMPLES = 20 N_HIDDEN = 300 # training data x = torch.unsqueeze(torch.linspace(-1, 1, N_SAMPLES), 1) y = x + 0.3 * torch.normal(torch.zeros(N_SAMPLES, 1), torch.ones(N_SAMPLES, 1)) x, y = Variable(x), Variable(y) # test data test_x = torch.unsqueeze(torch.linspace(-1, 1, N_SAMPLES), 1) test_y = test_x + 0.3 * torch.normal(torch.zeros(N_SAMPLES, 1), torch.ones(N_SAMPLES, 1)) test_x = Variable(test_x, volatile=True) test_y = Variable(test_y, volatile=True) # show data # plt.scatter(x.data.numpy(), y.data.numpy(), c='magenta', s=50, alpha=0.5, label='train') # plt.scatter(test_x.data.numpy(), test_y.data.numpy(), c='cyan', s=50, alpha=0.5, label='test') # plt.legend(loc='upper left') # plt.ylim((-2.5, 2.5)) # plt.show() net_overfitting = torch.nn.Sequential( torch.nn.Linear(1, N_HIDDEN), torch.nn.ReLU(), torch.nn.Linear(N_HIDDEN, N_HIDDEN), torch.nn.ReLU(), torch.nn.Linear(N_HIDDEN, 1), ) net_dropped = torch.nn.Sequential( torch.nn.Linear(1, N_HIDDEN), torch.nn.Dropout(0.5), torch.nn.ReLU(), torch.nn.Linear(N_HIDDEN, N_HIDDEN), torch.nn.Dropout(0.5), torch.nn.ReLU(), torch.nn.Linear(N_HIDDEN, 1), ) print(net_overfitting) print(net_dropped) optimizer_ofit = torch.optim.Adam( net_overfitting.parameters(), lr = 0.01, ) optimizer_drop = torch.optim.Adam( net_dropped.parameters(), lr = 0.01, ) loss_func = torch.nn.MSELoss() plt.ion() for t in range(500): pred_ofit = net_overfitting(x) pred_drop = net_dropped(x) loss_ofit = loss_func(pred_ofit, y) loss_drop = loss_func(pred_drop, y) optimizer_ofit.zero_grad() optimizer_drop.zero_grad() loss_ofit.backward() loss_drop.backward() optimizer_ofit.step() optimizer_drop.step() if t % 10 == 0: net_overfitting.eval() net_dropped.eval() plt.cla() test_pred_ofit = net_overfitting(test_x) test_pred_drop = net_dropped(test_x) plt.scatter(x.data.numpy(), y.data.numpy(), c='magenta', s=50, alpha=0.3, label='train') plt.scatter(test_x.data.numpy(), test_y.data.numpy(), c='cyan', s=50, alpha=0.3, label='test') plt.plot(test_x.data.numpy(), test_pred_ofit.data.numpy(), 'r-', lw=3, label='overfitting') plt.plot(test_x.data.numpy(), test_pred_drop.data.numpy(), 'b--', lw=3, label='dropout(50%)') plt.text(0, -1.2, 'overfitting loss=%.4f' % loss_func(test_pred_ofit, test_y).data[0], fontdict={'size': 20, 'color': 'red'}) plt.text(0, -1.5, 'dropout loss=%.4f' % loss_func(test_pred_drop, test_y).data[0], fontdict={'size': 20, 'color': 'blue'}) plt.legend(loc='upper left'); plt.ylim((-2.5, 2.5));plt.pause(0.1) net_overfitting.train() net_dropped.train() plt.ioff() plt.show()
補(bǔ)充:pytorch避免過擬合-dropout丟棄法的實(shí)現(xiàn)
對(duì)于一個(gè)單隱藏層的多層感知機(jī),其中輸入個(gè)數(shù)為4,隱藏單元個(gè)數(shù)為5,且隱藏單元的計(jì)算表達(dá)式為:
開始實(shí)現(xiàn)drop丟棄法避免過擬合
定義dropout函數(shù):
%matplotlib inline import torch import torch.nn as nn import numpy as np def dropout(X, drop_prob): X = X.float() assert 0 <= drop_prob <= 1 keep_prob = 1 - drop_prob # 這種情況下把全部元素都丟棄 if keep_prob == 0: return torch.zeros_like(X) mask = (torch.rand(X.shape) < keep_prob).float() return mask * X / keep_prob
定義模型參數(shù):
num_inputs, num_outputs, num_hiddens1, num_hiddens2 = 784, 10, 256, 256 W1 = torch.tensor(np.random.normal(0, 0.01, size=(num_inputs, num_hiddens1)), dtype=torch.float, requires_grad=True) b1 = torch.zeros(num_hiddens1, requires_grad=True) W2 = torch.tensor(np.random.normal(0, 0.01, size=(num_hiddens1, num_hiddens2)), dtype=torch.float, requires_grad=True) b2 = torch.zeros(num_hiddens2, requires_grad=True) W3 = torch.tensor(np.random.normal(0, 0.01, size=(num_hiddens2, num_outputs)), dtype=torch.float, requires_grad=True) b3 = torch.zeros(num_outputs, requires_grad=True) params = [W1, b1, W2, b2, W3, b3]
定義模型將全連接層和激活函數(shù)ReLU串起來,并對(duì)每個(gè)激活函數(shù)的輸出使用丟棄法。
分別設(shè)置各個(gè)層的丟棄概率。通常的建議是把靠近輸入層的丟棄概率設(shè)得小一點(diǎn)。
在這個(gè)實(shí)驗(yàn)中,我們把第一個(gè)隱藏層的丟棄概率設(shè)為0.2,把第二個(gè)隱藏層的丟棄概率設(shè)為0.5。
我們可以通過參數(shù)is_training來判斷運(yùn)行模式為訓(xùn)練還是測(cè)試,并只在訓(xùn)練模式下使用丟棄法。
drop_prob1, drop_prob2 = 0.2, 0.5 def net(X, is_training=True): X = X.view(-1, num_inputs) H1 = (torch.matmul(X, W1) + b1).relu() if is_training: # 只在訓(xùn)練模型時(shí)使用丟棄法 H1 = dropout(H1, drop_prob1) # 在第一層全連接后添加丟棄層 H2 = (torch.matmul(H1, W2) + b2).relu() if is_training: H2 = dropout(H2, drop_prob2) # 在第二層全連接后添加丟棄層 return torch.matmul(H2, W3) + b3 def evaluate_accuracy(data_iter, net): acc_sum, n = 0.0, 0 for X, y in data_iter: if isinstance(net, torch.nn.Module): net.eval() # 評(píng)估模式, 這會(huì)關(guān)閉dropout acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() net.train() # 改回訓(xùn)練模式 else: # 自定義的模型 if('is_training' in net.__code__.co_varnames): # 如果有is_training這個(gè)參數(shù) # 將is_training設(shè)置成False acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item() else: acc_sum += (net(X).argmax(dim=1) == y).float().sum().item() n += y.shape[0] return acc_sum / n
訓(xùn)練和測(cè)試模型:
num_epochs, lr, batch_size = 5, 100.0, 256 loss = torch.nn.CrossEntropyLoss() def load_data_fashion_mnist(batch_size, resize=None, root='~/Datasets/FashionMNIST'): """Download the fashion mnist dataset and then load into memory.""" trans = [] if resize: trans.append(torchvision.transforms.Resize(size=resize)) trans.append(torchvision.transforms.ToTensor()) transform = torchvision.transforms.Compose(trans) mnist_train = torchvision.datasets.FashionMNIST(root=root, train=True, download=True, transform=transform) mnist_test = torchvision.datasets.FashionMNIST(root=root, train=False, download=True, transform=transform) if sys.platform.startswith('win'): num_workers = 0 # 0表示不用額外的進(jìn)程來加速讀取數(shù)據(jù) else: num_workers = 4 train_iter = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True, num_workers=num_workers) test_iter = torch.utils.data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False, num_workers=num_workers) return train_iter, test_iter def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None, optimizer=None): for epoch in range(num_epochs): train_l_sum, train_acc_sum, n = 0.0, 0.0, 0 for X, y in train_iter: y_hat = net(X) l = loss(y_hat, y).sum() # 梯度清零 if optimizer is not None: optimizer.zero_grad() elif params is not None and params[0].grad is not None: for param in params: param.grad.data.zero_() l.backward() if optimizer is None: sgd(params, lr, batch_size) else: optimizer.step() # “softmax回歸的簡(jiǎn)潔實(shí)現(xiàn)”一節(jié)將用到 train_l_sum += l.item() train_acc_sum += (y_hat.argmax(dim=1) == y).sum().item() n += y.shape[0] test_acc = evaluate_accuracy(test_iter, net) print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f' % (epoch + 1, train_l_sum / n, train_acc_sum / n, test_acc)) train_iter, test_iter = load_data_fashion_mnist(batch_size) train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params, lr)
以上為個(gè)人經(jīng)驗(yàn),希望能給大家一個(gè)參考,也希望大家多多支持腳本之家。
相關(guān)文章
學(xué)會(huì)使用Python?Configparser處理ini文件模塊
這篇文章主要為大家介紹了使用Python?Configparser處理ini文件模塊的學(xué)習(xí),有需要的朋友可以借鑒參考下,希望能夠有所幫助,祝大家多多進(jìn)步,早日升職加薪2023-06-06Python 實(shí)現(xiàn)遞歸法解決迷宮問題的示例代碼
這篇文章主要介紹了Python 實(shí)現(xiàn)遞歸法解決迷宮問題的示例代碼,文中通過示例代碼介紹的非常詳細(xì),對(duì)大家的學(xué)習(xí)或者工作具有一定的參考學(xué)習(xí)價(jià)值,需要的朋友們下面隨著小編來一起學(xué)習(xí)學(xué)習(xí)吧2020-01-01django自帶serializers序列化返回指定字段的方法
今天小編就為大家分享一篇django自帶serializers序列化返回指定字段的方法,具有很好的參考價(jià)值,希望對(duì)大家有所幫助。一起跟隨小編過來看看吧2019-08-08Python+matplotlib實(shí)現(xiàn)折線圖的美化
這篇文章主要和大家分享一個(gè)非常有趣的Python教程—如何美化一個(gè)?matplotlib折線圖。文中的示例代碼講解詳細(xì),感興趣的可以了解一下2022-05-05基于Python實(shí)現(xiàn)全自動(dòng)二維碼識(shí)別
這篇文章主要為大家詳細(xì)介紹了如何基于Python實(shí)現(xiàn)全自動(dòng)二維碼識(shí)別功能,文中的示例代碼講解詳細(xì),感興趣的小伙伴可以跟隨小編一起學(xué)習(xí)一下2023-11-11Python學(xué)習(xí)筆記整理3之輸入輸出、python eval函數(shù)
這篇文章主要介紹了Python學(xué)習(xí)筆記整理3之輸入輸出、python eval函數(shù)的相關(guān)資料,需要的朋友可以參考下2015-12-12Python+wxPython實(shí)現(xiàn)自動(dòng)生成PPTX文檔程序
這篇文章主要介紹了如何使用 wxPython 模塊和 python-pptx 模塊來編寫一個(gè)程序,用于生成包含首頁(yè)、內(nèi)容頁(yè)和感謝頁(yè)的 PPTX 文檔,感興趣的小伙伴可以學(xué)習(xí)一下2023-08-08