深度学习 python pointnet中的T-net旋转网络

pointnet网络中t-net网络 T−Net是一个微型网络，用于生成一个仿射变换矩阵来对点云的旋转、平移等变化进行规范化处理。这个变换/对齐网络是一个微型的PointNet，它输入原始点云数据，输出为一个3∗3 的旋转矩阵

1.输入的点云为（x，y，z），需要一个3*3的旋转矩阵

2.将点云映射到k维的冗余空间，学习一个k*k的旋转矩阵，由于旋转矩阵具有正交性，因此校对需要引入一个正则化的惩罚项，希望尽可能接近于一个正交矩阵在网上找到两段代码，方便大家学习一下！！！

# T-Net: is a pointnet itself.获取3x3的变换矩阵，校正点云姿态；效果一般，后续的改进并没有再加入这部分

# 经过全连接层映射到9个数据，最后调整为3x3矩阵

class STN3d(nn.Module):

def __init__(self):

super(STN3d, self).__init__()

#mlp

self.conv1 = torch.nn.Conv1d(3, 64, 1)

self.conv2 = torch.nn.Conv1d(64, 128, 1)

self.conv3 = torch.nn.Conv1d(128, 1024, 1)

#fc

self.fc1 = nn.Linear(1024, 512)

self.fc2 = nn.Linear(512, 256)

self.fc3 = nn.Linear(256, 9)

#激活函数

self.relu = nn.ReLU()

#bn

self.bn1 = nn.BatchNorm1d(64)

self.bn2 = nn.BatchNorm1d(128)

self.bn3 = nn.BatchNorm1d(1024)

self.bn4 = nn.BatchNorm1d(512)

self.bn5 = nn.BatchNorm1d(256)

def forward(self, x):

batchsize = x.size()[0]

x = F.relu(self.bn1(self.conv1(x)))

x = F.relu(self.bn2(self.conv2(x)))

x = F.relu(self.bn3(self.conv3(x)))

x = torch.max(x, 2, keepdim=True)[0]

x = x.view(-1, 1024)

x = F.relu(self.bn4(self.fc1(x)))

x = F.relu(self.bn5(self.fc2(x)))

x = self.fc3(x)

# Variable已被弃用，之前的版本中，pytorch的tensor只能在CPU计算，Variable将tensor转换成variable，具有三个属性（data\grad\grad_fn）

# 现在二者已经融合，Variable返回tensor

# iden生成单位变换矩阵

# repeat(batchsize, 1)，重复batchsize次，生成batchsize x 9的tensor

iden = Variable(torch.from_numpy(np.array([1,0,0,0,1,0,0,0,1]).astype(np.float32))).view(1,9).repeat(batchsize,1)

#将单位矩阵送入GPU

if x.is_cuda:

iden = iden.cuda()

x = x + iden

# view()相当于numpy中的resize(),重构tensor维度，-1表示缺省参数由系统自动计算（为batchsize大小）

# 返回结果为 batchsize x 3 x 3

x = x.view(-1, 3, 3)

return x

# 数据为k维，用于mlp之后的高维特征，同上

class STNkd(nn.Module):

def __init__(self, k=64):

super(STNkd, self).__init__()

self.conv1 = torch.nn.Conv1d(k, 64, 1)

self.conv2 = torch.nn.Conv1d(64, 128, 1)

self.conv3 = torch.nn.Conv1d(128, 1024, 1)

self.fc1 = nn.Linear(1024, 512)

self.fc2 = nn.Linear(512, 256)

self.fc3 = nn.Linear(256, k*k)

self.relu = nn.ReLU()

self.bn1 = nn.BatchNorm1d(64)

self.bn2 = nn.BatchNorm1d(128)

self.bn3 = nn.BatchNorm1d(1024)

self.bn4 = nn.BatchNorm1d(512)

self.bn5 = nn.BatchNorm1d(256)

self.k = k

def forward(self, x):

batchsize = x.size()[0]

x = F.relu(self.bn1(self.conv1(x)))

x = F.relu(self.bn2(self.conv2(x)))

x = F.relu(self.bn3(self.conv3(x)))

x = torch.max(x, 2, keepdim=True)[0]

x = x.view(-1, 1024)

x = F.relu(self.bn4(self.fc1(x)))

x = F.relu(self.bn5(self.fc2(x)))

x = self.fc3(x)

iden = Variable(torch.from_numpy(np.eye(self.k).flatten().astype(np.float32))).view(1,self.k*self.k).repeat(batchsize,1)

if x.is_cuda:

iden = iden.cuda()

x = x + iden

x = x.view(-1, self.k, self.k)

return x

#包含变换矩阵的中间网络

class PointNetfeat(nn.Module):

def __init__(self, global_feat = True, feature_transform = False):

super(PointNetfeat, self).__init__()

self.stn = STN3d()

self.conv1 = torch.nn.Conv1d(3, 64, 1)

self.conv2 = torch.nn.Conv1d(64, 128, 1)

self.conv3 = torch.nn.Conv1d(128, 1024, 1)

self.bn1 = nn.BatchNorm1d(64)

self.bn2 = nn.BatchNorm1d(128)

self.bn3 = nn.BatchNorm1d(1024)

self.global_feat = global_feat

self.feature_transform = feature_transform

if self.feature_transform:

self.fstn = STNkd(k=64)

def forward(self, x):

n_pts = x.size()[2]# size()返回张量各个维度的尺度

trans = self.stn(x)#得到3x3的坐标变换矩阵

x = x.transpose(2, 1)#调整点的维度，将点云数据转换为nx3形式，便于和旋转矩阵计算

x = torch.bmm(x, trans)#点的坐标和3x3的变换矩阵相乘

x = x.transpose(2, 1)#再把点的坐标调整回来3xn

x = F.relu(self.bn1(self.conv1(x)))#作者本来在这里用了两次mlp

if self.feature_transform:

trans_feat = self.fstn(x)#得到64x64的特征变换矩阵

x = x.transpose(2,1)

x = torch.bmm(x, trans_feat)

x = x.transpose(2,1)

else:

trans_feat = None

pointfeat = x# 保留经过第一次mlp的特征，便于后续分割进行特征拼接融合

x = F.relu(self.bn2(self.conv2(x)))# 第二次mlp的第一层，64->128

x = self.bn3(self.conv3(x))# 第二次mlp的第二层，128->1024

x = torch.max(x, 2, keepdim=True)[0]# pointnet的核心操作，最大池化操作保证了点云的置换不变性（最大池化操作为对称函数）

x = x.view(-1, 1024)# resize池化结果的形状，获得全局1024维特征

if self.global_feat:

return x, trans, trans_feat#返回特征、坐标变换矩阵、特征变换矩阵

else:

x = x.view(-1, 1024, 1).repeat(1, 1, n_pts)

return torch.cat([x, pointfeat], 1), trans, trans_feat#分割时候会用到的global特征、坐标变换矩阵、特征变换矩阵

在这里插入代# K=3 代表输入的是原始点云，是每个点的维度(x,y,z). point_cloud 是一个Tensor，属性如下：

# point_cloud=Tensor("Placeholder:0", shape=(32, 1024, 3), dtype=float32, device=/device:GPU:0)

def input_transform_net(point_cloud, is_training, bn_decay=None, K=3):

""" Input (XYZ) Transform Net, input is BxNx3 gray image

Return:

Transformation matrix of size 3xK """

batch_size = point_cloud.get_shape()[0].value #点云的个数(一个batch包含的点云数目，pointNet 为 32)

num_point = point_cloud.get_shape()[1].value #每个点云内点的个数 (pointNet 为 1024）

input_image = tf.expand_dims(point_cloud, -1) #在point_cloud最后追加一个维度，BxNx3 变成 BxNx3x1 3d张量-->4d张量

# 输入点云point_cloud有3个axis，即B×N×3，tf.expand_dims(point_cloud, -1) 将点云最后加上一个size为1 的axis

# 作为 input_image（B×N×3×1），则input_image的channel数为1。

# net=Tensor("transform_net1/tfc1/Relu:0", shape=(x,x,x,x), dtype=float32, device=/device:GPU:0)

# 64 代表要输出的 channels (单通道变成64通道)

# [1,3]代表1行3列的矩阵，作为卷积核。将B×N×3×1转换成 B×N×1×64

# 步长：stride=[1,1] 代表滑动一个距离。决定滑动多少可以到边缘。

# padding='VALID',在原始图像上加边界(这里默认不加)

# bn: 批归一化

# is_training=is_training 设置训练模式

# bn_decay=bn_decay

net = tf_util.conv2d(input_image, 64, [1,3],

padding='VALID', stride=[1,1],

bn=True, is_training=is_training,

scope='tconv1', bn_decay=bn_decay)

# 128 代表要输出的 channels

# [1,1]代表1行1列的矩阵，作为卷积核。将B×N×1×64转换成 B×N×1×128

net = tf_util.conv2d(net, 128, [1,1],

padding='VALID', stride=[1,1],

bn=True, is_training=is_training,

scope='tconv2', bn_decay=bn_decay)

# 1024 代表要输出的 channels

# [1,1]代表1行1列的矩阵，作为卷积核。将B×N×1×128转换成 B×N×1 X 1024

net = tf_util.conv2d(net, 1024, [1, 1],

padding='VALID', stride=[1,1],

bn=True, is_training=is_training,

scope='tconv3', bn_decay=bn_decay)

#对上一步做 max_pooling 操作，将B×N×1×1024 转换成 B×1×1 X 1024

net = tf_util.max_pool2d(net, [num_point, 1], padding='VALID', scope='tmaxpool')

# 利用1024维特征生成256维度的特征向量

# 将 Bx1x1x1024变成 Bx1024

net = tf.reshape(net, [batch_size, -1])

# 将 Bx1024变成 Bx512

net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training,

scope='tfc1', bn_decay=bn_decay)

# 将 Bx512变成 Bx256

net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training,

scope='tfc2', bn_decay=bn_decay)

with tf.variable_scope('transform_XYZ') as sc:

assert(K==3)

# weights(wights,[256,9],dtype=tf.float32)

weights = tf.get_variable('weights', [256, 3*K],

initializer=tf.constant_initializer(0.0),

dtype=tf.float32)

biases = tf.get_variable('biases', [3*K],

initializer=tf.constant_initializer(0.0),

dtype=tf.float32)

#变成

#Tensor("transform_net1/transform_XYZ/add:0", shape=(9,), dtype=float32, device=/device:GPU:0)

biases += tf.constant([1,0,0,0,1,0,0,0,1], dtype=tf.float32)

# net = shape(32,256) weight = shape(256,9) ===> net*weight = transform(32,9)

# Tensor("transform_net1/transform_XYZ/MatMul:0", shape=(32, 9), dtype=float32, device=/device:GPU:0)

transform = tf.matmul(net, weights)

# Tensor("transform_net1/transform_XYZ/MatMul:0", shape=(32, 9), dtype=float32, device=/device:GPU:0)

# 变成

# Tensor("transform_net1/transform_XYZ/BiasAdd:0", shape=(32, 9), dtype=float32, device= / device: GPU:0)

transform = tf.nn.bias_add(transform, biases)

# 由Tensor("transform_net1/transform_XYZ/BiasAdd:0", shape=(32, 9), dtype=float32, device=/device:GPU:0)

# 变成

# Tensor("transform_net1/Reshape_1:0", shape=(32, 3, 3), dtype=float32, device=/device:GPU:0)

transform = tf.reshape(transform, [batch_size, 3, K])

return transform码片

参考阅读

评论可见，请评论后查看内容，谢谢！！！评论后请刷新页面。

夸智网

深度学习 python pointnet中的T-net旋转网络

linux 网络入门基础

php 开发语言智算中心网络架构设计实践（2023）

发表评论取消回复

夸智网

深度学习 python pointnet中的T-net旋转网络

linux 网络入门基础

php 开发语言 智算中心网络架构设计实践（2023）

相关文章

发表评论取消回复

php 开发语言智算中心网络架构设计实践（2023）