0%

Python API

发表于 2022-01-18 更新于 2022-02-13 阅读次数：

^ _ ^

Gradient Clip

'''To avoid graidient exploration
    @parameters: parameters needed to clip
    @max_norm: the highest boundary of the gradient of params
    @norm_type: the type of norm
'''
# torch.nn.utils.clip_grad_norm_
def clip_grad_norm_(parameters, max_norm, norm_type=2):
    if isinstance(parameters, torch.Tensor):
        parameters = [parameters]
    parameters = list(filter(lambda p: p.grad is not None, parameters))
    max_norm = float(max_norm)
    norm_type = float(norm_type)
    if norm_type == inf:
        total_norm = max(p.grad.data.abs().max() for p in parameters)
    else:
        total_norm = 0
        for p in parameters:
            param_norm = p.grad.data.norm(norm_type)
            total_norm += param_norm.item() ** norm_type
        total_norm = total_norm ** (1. / norm_type)
    clip_coef = max_norm / (total_norm + 1e-6)
    if clip_coef < 1:
        for p in parameters:
            p.grad.data.mul_(clip_coef)
    return total_norm

TransformerEncoderLayer

'''torch.nn.TransformerEncoderLayer
    @param d_model – the number of expected features in the input (required).
    @param nhead – the number of heads in the multiheadattention models (required).
    @param dim_feedforward – the dimension of the feedforward network model (default=2048).
    @param dropout – the dropout value (default=0.1).
    @param activation – the activation function of the intermediate layer, can be a string (“relu” or “gelu”) or a unary callable. Default: relu
    @param layer_norm_eps – the eps value in layer normalization components (default=1e-5).
    @param batch_first – If True, then the input and output tensors are provided as (batch, seq, feature). Default: False.
    @param norm_first – if True, layer norm is done prior to attention and feedforward operations, respectivaly. Otherwise it’s done after. Default: False (after). 
'''

torch.expand

torch.arange(5).expand(3, 5)
'''
tensor([[0, 1, 2, 3, 4],
        [0, 1, 2, 3, 4],
        [0, 1, 2, 3, 4]])
'''

index_fill

'''pytorch -- index_fill
    @param dim: the dimension of index
    @param index
    @param value
'''

import torch

a = torch.randn(4, 3)
print(a)
# tensor([[-1.7189,  0.9798, -0.0428],
#         [ 0.7184, -0.2824, -1.0289],
#         [ 1.2858,  0.8423, -1.0473],
#         [-0.0269, -0.9876, -2.3126]])
index = torch.tensor([0, 2])
b=a.index_fill(1, index, 9)#要填充1维
print(b)
# tensor([[ 9.0000,  0.9798,  9.0000],
#         [ 9.0000, -0.2824,  9.0000],
#         [ 9.0000,  0.8423,  9.0000],
#         [ 9.0000, -0.9876,  9.0000]])

c=a.index_fill(0, index, 9)#要填充0维
print(c)
# tensor([[ 9.0000,  9.0000,  9.0000],
#         [ 0.7184, -0.2824, -1.0289],
#         [ 9.0000,  9.0000,  9.0000],
#         [-0.0269, -0.9876, -2.3126]])

torch.multinomial

'''torch.multinomial
    Sample from every row in input 
    @param input: sample weight, must be tensor
    @param num_samples
    @param replacement: default False, whether back on sampling
'''
weights = torch.Tensor([
    [0, 0.3, 0.7], 
    [0.3, 0.7, 0]
])
torch.multinomial(weights, 2)
# >>> tensor([[2, 1],
#             [1, 0]])

torch.div

'''Divide each element of `input` with the scale `other`
    @param input
    @param other: the number to be divided to each element of input
'''

torch.bmm

'''batch matrix multiplication
    torch.bmm(a, b)
    a: (b, h, w)
    b: (b, w, h)
'''

torch.clamp

'''Limit every item in the input in the limited range
    @param input
    @param min: low boundary
    @param max: high boundary
    
      | min, if x_i < min
y_i = | x_i, if min <= x_i <= max
      | max, if x_i > max
'''

view

'''view
    like `reshape`, redefined the shape of matrix
'''
import torch
v1 = torch.range(1, 16) 
v2 = v1.view(4, 4)  

import torch
v1 = torch.range(1, 16) 
# -1 means auto adjust according to the number of elements
v2 = v1.view(-1, 4)