pointnet网络中t-net网络 T−Net是一个微型网络,用于生成一个仿射变换矩阵来对点云的旋转、平移等变化进行规范化处理。这个变换/对齐网络是一个微型的PointNet,它输入原始点云数据,输出为一个3∗3 的旋转矩阵

1.输入的点云为(x,y,z),需要一个3*3的旋转矩阵

2.将点云映射到k维的冗余空间,学习一个k*k的旋转矩阵,由于旋转矩阵具有正交性,因此校对需要引入一个正则化的惩罚项,希望尽可能接近于一个正交矩阵 在网上找到两段代码,方便大家学习一下!!!

# T-Net: is a pointnet itself.获取3x3的变换矩阵,校正点云姿态;效果一般,后续的改进并没有再加入这部分

# 经过全连接层映射到9个数据,最后调整为3x3矩阵

class STN3d(nn.Module):

def __init__(self):

super(STN3d, self).__init__()

#mlp

self.conv1 = torch.nn.Conv1d(3, 64, 1)

self.conv2 = torch.nn.Conv1d(64, 128, 1)

self.conv3 = torch.nn.Conv1d(128, 1024, 1)

#fc

self.fc1 = nn.Linear(1024, 512)

self.fc2 = nn.Linear(512, 256)

self.fc3 = nn.Linear(256, 9)

#激活函数

self.relu = nn.ReLU()

#bn

self.bn1 = nn.BatchNorm1d(64)

self.bn2 = nn.BatchNorm1d(128)

self.bn3 = nn.BatchNorm1d(1024)

self.bn4 = nn.BatchNorm1d(512)

self.bn5 = nn.BatchNorm1d(256)

def forward(self, x):

batchsize = x.size()[0]

x = F.relu(self.bn1(self.conv1(x)))

x = F.relu(self.bn2(self.conv2(x)))

x = F.relu(self.bn3(self.conv3(x)))

x = torch.max(x, 2, keepdim=True)[0]

x = x.view(-1, 1024)

x = F.relu(self.bn4(self.fc1(x)))

x = F.relu(self.bn5(self.fc2(x)))

x = self.fc3(x)

# Variable已被弃用,之前的版本中,pytorch的tensor只能在CPU计算,Variable将tensor转换成variable,具有三个属性(data\grad\grad_fn)

# 现在二者已经融合,Variable返回tensor

# iden生成单位变换矩阵

# repeat(batchsize, 1),重复batchsize次,生成batchsize x 9的tensor

iden = Variable(torch.from_numpy(np.array([1,0,0,0,1,0,0,0,1]).astype(np.float32))).view(1,9).repeat(batchsize,1)

#将单位矩阵送入GPU

if x.is_cuda:

iden = iden.cuda()

x = x + iden

# view()相当于numpy中的resize(),重构tensor维度,-1表示缺省参数由系统自动计算(为batchsize大小)

# 返回结果为 batchsize x 3 x 3

x = x.view(-1, 3, 3)

return x

# 数据为k维,用于mlp之后的高维特征,同上

class STNkd(nn.Module):

def __init__(self, k=64):

super(STNkd, self).__init__()

self.conv1 = torch.nn.Conv1d(k, 64, 1)

self.conv2 = torch.nn.Conv1d(64, 128, 1)

self.conv3 = torch.nn.Conv1d(128, 1024, 1)

self.fc1 = nn.Linear(1024, 512)

self.fc2 = nn.Linear(512, 256)

self.fc3 = nn.Linear(256, k*k)

self.relu = nn.ReLU()

self.bn1 = nn.BatchNorm1d(64)

self.bn2 = nn.BatchNorm1d(128)

self.bn3 = nn.BatchNorm1d(1024)

self.bn4 = nn.BatchNorm1d(512)

self.bn5 = nn.BatchNorm1d(256)

self.k = k

def forward(self, x):

batchsize = x.size()[0]

x = F.relu(self.bn1(self.conv1(x)))

x = F.relu(self.bn2(self.conv2(x)))

x = F.relu(self.bn3(self.conv3(x)))

x = torch.max(x, 2, keepdim=True)[0]

x = x.view(-1, 1024)

x = F.relu(self.bn4(self.fc1(x)))

x = F.relu(self.bn5(self.fc2(x)))

x = self.fc3(x)

iden = Variable(torch.from_numpy(np.eye(self.k).flatten().astype(np.float32))).view(1,self.k*self.k).repeat(batchsize,1)

if x.is_cuda:

iden = iden.cuda()

x = x + iden

x = x.view(-1, self.k, self.k)

return x

#包含变换矩阵的中间网络

class PointNetfeat(nn.Module):

def __init__(self, global_feat = True, feature_transform = False):

super(PointNetfeat, self).__init__()

self.stn = STN3d()

self.conv1 = torch.nn.Conv1d(3, 64, 1)

self.conv2 = torch.nn.Conv1d(64, 128, 1)

self.conv3 = torch.nn.Conv1d(128, 1024, 1)

self.bn1 = nn.BatchNorm1d(64)

self.bn2 = nn.BatchNorm1d(128)

self.bn3 = nn.BatchNorm1d(1024)

self.global_feat = global_feat

self.feature_transform = feature_transform

if self.feature_transform:

self.fstn = STNkd(k=64)

def forward(self, x):

n_pts = x.size()[2]# size()返回张量各个维度的尺度

trans = self.stn(x)#得到3x3的坐标变换矩阵

x = x.transpose(2, 1)#调整点的维度,将点云数据转换为nx3形式,便于和旋转矩阵计算

x = torch.bmm(x, trans)#点的坐标和3x3的变换矩阵相乘

x = x.transpose(2, 1)#再把点的坐标调整回来3xn

x = F.relu(self.bn1(self.conv1(x)))#作者本来在这里用了两次mlp

if self.feature_transform:

trans_feat = self.fstn(x)#得到64x64的特征变换矩阵

x = x.transpose(2,1)

x = torch.bmm(x, trans_feat)

x = x.transpose(2,1)

else:

trans_feat = None

pointfeat = x# 保留经过第一次mlp的特征,便于后续分割进行特征拼接融合

x = F.relu(self.bn2(self.conv2(x)))# 第二次mlp的第一层,64->128

x = self.bn3(self.conv3(x))# 第二次mlp的第二层,128->1024

x = torch.max(x, 2, keepdim=True)[0]# pointnet的核心操作,最大池化操作保证了点云的置换不变性(最大池化操作为对称函数)

x = x.view(-1, 1024)# resize池化结果的形状,获得全局1024维特征

if self.global_feat:

return x, trans, trans_feat#返回特征、坐标变换矩阵、特征变换矩阵

else:

x = x.view(-1, 1024, 1).repeat(1, 1, n_pts)

return torch.cat([x, pointfeat], 1), trans, trans_feat#分割时候会用到的global特征、坐标变换矩阵、特征变换矩阵

在这里插入代# K=3 代表输入的是原始点云,是每个点的维度(x,y,z). point_cloud 是一个Tensor,属性如下:

# point_cloud=Tensor("Placeholder:0", shape=(32, 1024, 3), dtype=float32, device=/device:GPU:0)

def input_transform_net(point_cloud, is_training, bn_decay=None, K=3):

""" Input (XYZ) Transform Net, input is BxNx3 gray image

Return:

Transformation matrix of size 3xK """

batch_size = point_cloud.get_shape()[0].value #点云的个数(一个batch包含的点云数目,pointNet 为 32)

num_point = point_cloud.get_shape()[1].value #每个点云内点的个数 (pointNet 为 1024)

input_image = tf.expand_dims(point_cloud, -1) #在point_cloud最后追加一个维度,BxNx3 变成 BxNx3x1 3d张量-->4d张量

# 输入点云point_cloud有3个axis,即B×N×3,tf.expand_dims(point_cloud, -1) 将点云最后加上一个size为1 的axis

# 作为 input_image(B×N×3×1),则input_image的channel数为1。

# net=Tensor("transform_net1/tfc1/Relu:0", shape=(x,x,x,x), dtype=float32, device=/device:GPU:0)

# 64 代表要输出的 channels (单通道变成64通道)

# [1,3]代表1行3列的矩阵,作为卷积核。将B×N×3×1转换成 B×N×1×64

# 步长:stride=[1,1] 代表滑动一个距离。决定滑动多少可以到边缘。

# padding='VALID',在原始图像上加边界(这里默认不加)

# bn: 批归一化

# is_training=is_training 设置训练模式

# bn_decay=bn_decay

net = tf_util.conv2d(input_image, 64, [1,3],

padding='VALID', stride=[1,1],

bn=True, is_training=is_training,

scope='tconv1', bn_decay=bn_decay)

# 128 代表要输出的 channels

# [1,1]代表1行1列的矩阵,作为卷积核。将B×N×1×64转换成 B×N×1×128

net = tf_util.conv2d(net, 128, [1,1],

padding='VALID', stride=[1,1],

bn=True, is_training=is_training,

scope='tconv2', bn_decay=bn_decay)

# 1024 代表要输出的 channels

# [1,1]代表1行1列的矩阵,作为卷积核。将B×N×1×128转换成 B×N×1 X 1024

net = tf_util.conv2d(net, 1024, [1, 1],

padding='VALID', stride=[1,1],

bn=True, is_training=is_training,

scope='tconv3', bn_decay=bn_decay)

#对上一步做 max_pooling 操作,将B×N×1×1024 转换成 B×1×1 X 1024

net = tf_util.max_pool2d(net, [num_point, 1], padding='VALID', scope='tmaxpool')

# 利用1024维特征生成256维度的特征向量

# 将 Bx1x1x1024变成 Bx1024

net = tf.reshape(net, [batch_size, -1])

# 将 Bx1024变成 Bx512

net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training,

scope='tfc1', bn_decay=bn_decay)

# 将 Bx512变成 Bx256

net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training,

scope='tfc2', bn_decay=bn_decay)

with tf.variable_scope('transform_XYZ') as sc:

assert(K==3)

# weights(wights,[256,9],dtype=tf.float32)

weights = tf.get_variable('weights', [256, 3*K],

initializer=tf.constant_initializer(0.0),

dtype=tf.float32)

#

biases = tf.get_variable('biases', [3*K],

initializer=tf.constant_initializer(0.0),

dtype=tf.float32)

#

#变成

#Tensor("transform_net1/transform_XYZ/add:0", shape=(9,), dtype=float32, device=/device:GPU:0)

biases += tf.constant([1,0,0,0,1,0,0,0,1], dtype=tf.float32)

# net = shape(32,256) weight = shape(256,9) ===> net*weight = transform(32,9)

# Tensor("transform_net1/transform_XYZ/MatMul:0", shape=(32, 9), dtype=float32, device=/device:GPU:0)

transform = tf.matmul(net, weights)

# Tensor("transform_net1/transform_XYZ/MatMul:0", shape=(32, 9), dtype=float32, device=/device:GPU:0)

# 变成

# Tensor("transform_net1/transform_XYZ/BiasAdd:0", shape=(32, 9), dtype=float32, device= / device: GPU:0)

transform = tf.nn.bias_add(transform, biases)

# 由Tensor("transform_net1/transform_XYZ/BiasAdd:0", shape=(32, 9), dtype=float32, device=/device:GPU:0)

# 变成

# Tensor("transform_net1/Reshape_1:0", shape=(32, 3, 3), dtype=float32, device=/device:GPU:0)

transform = tf.reshape(transform, [batch_size, 3, K])

return transform码片

参考阅读

评论可见,请评论后查看内容,谢谢!!!评论后请刷新页面。