data = Iris.dataset()
# training loop
for epoch in range(100):
# y_predicted = model(x) 其实等同於
for x, y in data:
# forward + backward + weight updates
for loop
,第一层决定了我们要训练过左有资料几次,而第二层竟然是我们的 Data set size
# training loop
for epoch in range(100):
# loop over all batches
for i in range(total_batches):
x_batch, y_batch = ...
epochs = nums # a number
# training loop
for epoch in range(epochs):
...
torch.utils.data
里面的 Dataset
import torch
from torch.utils.data import Dataset
# 这两个是资料处里常用的套件
import numpy as np
import pandas as pd
# example of dataset create
class ExampleDataset(Dataset):
# data loading
def __init__(self):
xy = np.loadtxt('./dataset-example.csv', delimiter=',', dtype=np.float32, skiprows=1)
self.x = torch.from_numpy(xy[:, 1:])
self.y = torch.from_numpy(xy[:, [0]])
self.n_samples = xy.shape[0]
# working for indexing
def __getitem__(self, index):
return self.x[index], self.y[index]
# return the length of our dataset
def __len__(self):
return self.n_samples
__init__
里面整理好__getitem__
的 return
__init__
的 return 去回传资料长度dataset = ExampleDataset()
# pick first data
first_data = dataset[0]
features, labels = first_data
print(features, labels)
print(len(dataset))
len()
去做显示from torch.utils.data.sampler import SubsetRandomSampler
提供一种迭代数据集元素索引的__len__()方法,以及一个返回返回迭代器长度的方法from torch.utils.data.sampler import SubsetRandomSampler
# create data
...
# split data
# set testing data size
test_split = 0.2
# need shuffle or not
shuffle_dataset = True
# random seed of shuffle
random_seed = 1234
# creat data indices for training and testing splits
dataset_size = len(dataset)
indices = list(range(dataset_size))
# count out split size
split = int(np.floor(test_split * dataset_size))
if shuffle_dataset:
np.random.seed(random_seed)
np.random.shuffle(indices)
train_indices, test_indices = indices[split:], indices[:split]
# creating data samplers and loaders:
train_sampler = SubsetRandomSampler(train_indices)
test_sampler = SubsetRandomSampler(test_indices)
train_loader = DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
test_loader = DataLoader(dataset, batch_size=batch_size, sampler=test_sampler)
# example of dataloader use
dataloader = DataLoader(dataset=dataset, batch_size=4, shuffle=True)
dataiter = iter(dataloader)
data = dataiter.next()
features, labels = data
print(features, labels)
# tensor([[0., 0., 0., ..., 0., 0., 0.],
# [0., 0., 0., ..., 0., 0., 0.],
# [0., 0., 0., ..., 0., 0., 0.],
# [0., 0., 0., ..., 0., 0., 0.]]) tensor([[1.],
# [0.],
# [1.],
# [4.]])
# example of dataloader use
dataloader = DataLoader(dataset=dataset, batch_size=4, shuffle=True)
# training loop
epochs = 2
total_samples = len(dataset)
n_iterations = np.ceil(total_samples / 4)
print(total_samples, n_iterations)
for epoch in range(epochs):
for i, (features, targets) in enumerate(dataloader):
# forward backward pass, update
if (i+1) % 1 == 0:
print(f'epoch {epoch+1}/{epochs}, step{i+1}/{n_iterations}')
# 9 3.0
# epoch 1/2, step1/3.0
# epoch 1/2, step2/3.0
# epoch 1/2, step3/3.0
# epoch 2/2, step1/3.0
# epoch 2/2, step2/3.0
# epoch 2/2, step3/3.0
<<: Day22: WAF、Firewall Manager、Shield简介
找水电、装潢师傅,大家都怎麽找? 亲友介绍?路边的实体店面?看附近有没有贴纸? 那些年,遇到的水电装...
本篇文章同步发表在 HKT 线上教室 部落格,线上影音教学课程已上架至 Udemy 和 Youtu...
数据管理员(Data Steward) 数据管理员是组织中的一个角色,负责利用组织的数据治理流程来...
前言 想透过 phpMyAdmin 把正式机资料拉下来,汇入本机 docker 上的资料库做开发;但...
昨天我们讲了Vue的一生,今天来说说模板语法,看看要怎麽把vue instance中的资料变化渲染到...