In [11]:
# %load pytorch_sample_nn.py

# https://pytorch.org/tutorials/beginner/pytorch_with_examples.html

import torch
import math


# Create Tensors to hold input and outputs.
x = torch.linspace(-math.pi, math.pi, 2000)
y = torch.sin(x)

# For this example, the output y is a linear function of (x, x^2, x^3), so
# we can consider it as a linear layer neural network. Let's prepare the
# tensor (x, x^2, x^3).
p = torch.tensor([1, 2, 3])
xx = x.unsqueeze(-1).pow(p)

# In the above code, x.unsqueeze(-1) has shape (2000, 1), and p has shape
# (3,), for this case, broadcasting semantics will apply to obtain a tensor
# of shape (2000, 3) 

# Use the nn package to define our model as a sequence of layers. nn.Sequential
# is a Module which contains other Modules, and applies them in sequence to
# produce its output. The Linear Module computes output from input using a
# linear function, and holds internal Tensors for its weight and bias.
# The Flatten layer flatens the output of the linear layer to a 1D tensor,
# to match the shape of `y`.
model = torch.nn.Sequential(
    torch.nn.Linear(3, 1),
    torch.nn.Flatten(0, 1)
)

# The nn package also contains definitions of popular loss functions; in this
# case we will use Mean Squared Error (MSE) as our loss function.
loss_fn = torch.nn.MSELoss(reduction='sum')

learning_rate = 1e-6
for t in range(20000):

    # Forward pass: compute predicted y by passing x to the model. Module objects
    # override the __call__ operator so you can call them like functions. When
    # doing so you pass a Tensor of input data to the Module and it produces
    # a Tensor of output data.
    y_pred = model(xx)

    # Compute and print loss. We pass Tensors containing the predicted and true
    # values of y, and the loss function returns a Tensor containing the
    # loss.
    loss = loss_fn(y_pred, y)
    if t % 100 == 99:
        print(t, loss.item())

    # Zero the gradients before running the backward pass.
    model.zero_grad()

    # Backward pass: compute gradient of the loss with respect to all the learnable
    # parameters of the model. Internally, the parameters of each Module are stored
    # in Tensors with requires_grad=True, so this call will compute gradients for
    # all learnable parameters in the model.
    loss.backward()

    # Update the weights using gradient descent. Each parameter is a Tensor, so
    # we can access its gradients like we did before.
    with torch.no_grad():
        for param in model.parameters():
            param -= learning_rate * param.grad

# You can access the first layer of `model` like accessing the first item of a list
linear_layer = model[0]

# For linear layer, its parameters are stored as `weight` and `bias`.
print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')


99 1079.5560302734375
199 716.9285888671875
299 477.11639404296875
399 318.5227966308594
499 213.6402587890625
599 144.27780151367188
699 98.40565490722656
799 68.06826782226562
899 48.0045051574707
999 34.73521423339844
1099 25.959197998046875
1199 20.155025482177734
1299 16.31621742248535
1399 13.777246475219727
1499 12.097970962524414
1599 10.98727035522461
1699 10.252596855163574
1799 9.766687393188477
1899 9.44528579711914
1999 9.232666969299316
2099 9.092034339904785
2199 8.999002456665039
2299 8.937461853027344
2399 8.89675235748291
2499 8.869823455810547
2599 8.852007865905762
2699 8.84022045135498
2799 8.832422256469727
2899 8.827260971069336
2999 8.823847770690918
3099 8.821588516235352
3199 8.820094108581543
3299 8.819103240966797
3399 8.818449974060059
3499 8.81801700592041
3599 8.817729949951172
3699 8.817540168762207
3799 8.817415237426758
3899 8.817331314086914
3999 8.817276954650879
4099 8.817238807678223
4199 8.817214965820312
4299 8.81719970703125
4399 8.8171882629394