Theme NexT works best with JavaScript enabled
0%

Python API

^ _ ^

Gradient Clip

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
'''To avoid graidient exploration
@parameters: parameters needed to clip
@max_norm: the highest boundary of the gradient of params
@norm_type: the type of norm
'''
# torch.nn.utils.clip_grad_norm_
def clip_grad_norm_(parameters, max_norm, norm_type=2):
if isinstance(parameters, torch.Tensor):
parameters = [parameters]
parameters = list(filter(lambda p: p.grad is not None, parameters))
max_norm = float(max_norm)
norm_type = float(norm_type)
if norm_type == inf:
total_norm = max(p.grad.data.abs().max() for p in parameters)
else:
total_norm = 0
for p in parameters:
param_norm = p.grad.data.norm(norm_type)
total_norm += param_norm.item() ** norm_type
total_norm = total_norm ** (1. / norm_type)
clip_coef = max_norm / (total_norm + 1e-6)
if clip_coef < 1:
for p in parameters:
p.grad.data.mul_(clip_coef)
return total_norm

TransformerEncoderLayer

1
2
3
4
5
6
7
8
9
10
'''torch.nn.TransformerEncoderLayer
@param d_model – the number of expected features in the input (required).
@param nhead – the number of heads in the multiheadattention models (required).
@param dim_feedforward – the dimension of the feedforward network model (default=2048).
@param dropout – the dropout value (default=0.1).
@param activation – the activation function of the intermediate layer, can be a string (“relu” or “gelu”) or a unary callable. Default: relu
@param layer_norm_eps – the eps value in layer normalization components (default=1e-5).
@param batch_first – If True, then the input and output tensors are provided as (batch, seq, feature). Default: False.
@param norm_first – if True, layer norm is done prior to attention and feedforward operations, respectivaly. Otherwise it’s done after. Default: False (after).
'''

torch.expand

1
2
3
4
5
6
torch.arange(5).expand(3, 5)
'''
tensor([[0, 1, 2, 3, 4],
[0, 1, 2, 3, 4],
[0, 1, 2, 3, 4]])
'''

index_fill

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
'''pytorch -- index_fill
@param dim: the dimension of index
@param index
@param value
'''

import torch

a = torch.randn(4, 3)
print(a)
# tensor([[-1.7189, 0.9798, -0.0428],
# [ 0.7184, -0.2824, -1.0289],
# [ 1.2858, 0.8423, -1.0473],
# [-0.0269, -0.9876, -2.3126]])
index = torch.tensor([0, 2])
b=a.index_fill(1, index, 9)#要填充1维
print(b)
# tensor([[ 9.0000, 0.9798, 9.0000],
# [ 9.0000, -0.2824, 9.0000],
# [ 9.0000, 0.8423, 9.0000],
# [ 9.0000, -0.9876, 9.0000]])

c=a.index_fill(0, index, 9)#要填充0维
print(c)
# tensor([[ 9.0000, 9.0000, 9.0000],
# [ 0.7184, -0.2824, -1.0289],
# [ 9.0000, 9.0000, 9.0000],
# [-0.0269, -0.9876, -2.3126]])

torch.multinomial

1
2
3
4
5
6
7
8
9
10
11
12
13
'''torch.multinomial
Sample from every row in input
@param input: sample weight, must be tensor
@param num_samples
@param replacement: default False, whether back on sampling
'''
weights = torch.Tensor([
[0, 0.3, 0.7],
[0.3, 0.7, 0]
])
torch.multinomial(weights, 2)
# >>> tensor([[2, 1],
# [1, 0]])

torch.div

1
2
3
4
'''Divide each element of `input` with the scale `other`
@param input
@param other: the number to be divided to each element of input
'''

torch.bmm

1
2
3
4
5
'''batch matrix multiplication
torch.bmm(a, b)
a: (b, h, w)
b: (b, w, h)
'''

torch.clamp

1
2
3
4
5
6
7
8
9
'''Limit every item in the input in the limited range
@param input
@param min: low boundary
@param max: high boundary

| min, if x_i < min
y_i = | x_i, if min <= x_i <= max
| max, if x_i > max
'''

view

1
2
3
4
5
6
7
8
9
10
11
'''view
like `reshape`, redefined the shape of matrix
'''
import torch
v1 = torch.range(1, 16)
v2 = v1.view(4, 4)

import torch
v1 = torch.range(1, 16)
# -1 means auto adjust according to the number of elements
v2 = v1.view(-1, 4)