From a31b359752d9d0ea1a0ea90dfbbd87d100ed0110 Mon Sep 17 00:00:00 2001
From: inter <inter@qq.com>
Date: Sun, 21 Sep 2025 20:19:09 +0800
Subject: [PATCH] Add File

---
 .../pointnet2_stack/pointnet2_modules.py      | 470 ++++++++++++++++++
 1 file changed, 470 insertions(+)
 create mode 100644 pcdet/ops/pointnet2/pointnet2_stack/pointnet2_modules.py

diff --git a/pcdet/ops/pointnet2/pointnet2_stack/pointnet2_modules.py b/pcdet/ops/pointnet2/pointnet2_stack/pointnet2_modules.py
new file mode 100644
index 0000000..0210ab2
--- /dev/null
+++ b/pcdet/ops/pointnet2/pointnet2_stack/pointnet2_modules.py
@@ -0,0 +1,470 @@
+from typing import List
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from . import pointnet2_utils
+
+
+def build_local_aggregation_module(input_channels, config):
+    local_aggregation_name = config.get('NAME', 'StackSAModuleMSG')
+
+    if local_aggregation_name == 'StackSAModuleMSG':
+        mlps = config.MLPS
+        for k in range(len(mlps)):
+            mlps[k] = [input_channels] + mlps[k]
+        cur_layer = StackSAModuleMSG(
+            radii=config.POOL_RADIUS, nsamples=config.NSAMPLE, mlps=mlps, use_xyz=True, pool_method='max_pool',
+        )
+        num_c_out = sum([x[-1] for x in mlps])
+    elif local_aggregation_name == 'VectorPoolAggregationModuleMSG':
+        cur_layer = VectorPoolAggregationModuleMSG(input_channels=input_channels, config=config)
+        num_c_out = config.MSG_POST_MLPS[-1]
+    else:
+        raise NotImplementedError
+
+    return cur_layer, num_c_out
+
+
+class StackSAModuleMSG(nn.Module):
+
+    def __init__(self, *, radii: List[float], nsamples: List[int], mlps: List[List[int]],
+                 use_xyz: bool = True, pool_method='max_pool'):
+        """
+        Args:
+            radii: list of float, list of radii to group with
+            nsamples: list of int, number of samples in each ball query
+            mlps: list of list of int, spec of the pointnet before the global pooling for each scale
+            use_xyz:
+            pool_method: max_pool / avg_pool
+        """
+        super().__init__()
+
+        assert len(radii) == len(nsamples) == len(mlps)
+
+        self.groupers = nn.ModuleList()
+        self.mlps = nn.ModuleList()
+        for i in range(len(radii)):
+            radius = radii[i]
+            nsample = nsamples[i]
+            self.groupers.append(pointnet2_utils.QueryAndGroup(radius, nsample, use_xyz=use_xyz))
+            mlp_spec = mlps[i]
+            if use_xyz:
+                mlp_spec[0] += 3
+
+            shared_mlps = []
+            for k in range(len(mlp_spec) - 1):
+                shared_mlps.extend([
+                    nn.Conv2d(mlp_spec[k], mlp_spec[k + 1], kernel_size=1, bias=False),
+                    nn.BatchNorm2d(mlp_spec[k + 1]),
+                    nn.ReLU()
+                ])
+            self.mlps.append(nn.Sequential(*shared_mlps))
+        self.pool_method = pool_method
+
+        self.init_weights()
+
+    def init_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(m.weight)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            if isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1.0)
+                nn.init.constant_(m.bias, 0)
+
+    def forward(self, xyz, xyz_batch_cnt, new_xyz, new_xyz_batch_cnt, features=None, empty_voxel_set_zeros=True):
+        """
+        :param xyz: (N1 + N2 ..., 3) tensor of the xyz coordinates of the features
+        :param xyz_batch_cnt: (batch_size), [N1, N2, ...]
+        :param new_xyz: (M1 + M2 ..., 3)
+        :param new_xyz_batch_cnt: (batch_size), [M1, M2, ...]
+        :param features: (N1 + N2 ..., C) tensor of the descriptors of the the features
+        :return:
+            new_xyz: (M1 + M2 ..., 3) tensor of the new features' xyz
+            new_features: (M1 + M2 ..., \sum_k(mlps[k][-1])) tensor of the new_features descriptors
+        """
+        new_features_list = []
+        for k in range(len(self.groupers)):
+            new_features, ball_idxs = self.groupers[k](
+                xyz, xyz_batch_cnt, new_xyz, new_xyz_batch_cnt, features
+            )  # (M1 + M2, C, nsample)
+            new_features = new_features.permute(1, 0, 2).unsqueeze(dim=0)  # (1, C, M1 + M2 ..., nsample)
+            new_features = self.mlps[k](new_features)  # (1, C, M1 + M2 ..., nsample)
+
+            if self.pool_method == 'max_pool':
+                new_features = F.max_pool2d(
+                    new_features, kernel_size=[1, new_features.size(3)]
+                ).squeeze(dim=-1)  # (1, C, M1 + M2 ...)
+            elif self.pool_method == 'avg_pool':
+                new_features = F.avg_pool2d(
+                    new_features, kernel_size=[1, new_features.size(3)]
+                ).squeeze(dim=-1)  # (1, C, M1 + M2 ...)
+            else:
+                raise NotImplementedError
+            new_features = new_features.squeeze(dim=0).permute(1, 0)  # (M1 + M2 ..., C)
+            new_features_list.append(new_features)
+
+        new_features = torch.cat(new_features_list, dim=1)  # (M1 + M2 ..., C)
+
+        return new_xyz, new_features
+
+
+class StackPointnetFPModule(nn.Module):
+    def __init__(self, *, mlp: List[int]):
+        """
+        Args:
+            mlp: list of int
+        """
+        super().__init__()
+        shared_mlps = []
+        for k in range(len(mlp) - 1):
+            shared_mlps.extend([
+                nn.Conv2d(mlp[k], mlp[k + 1], kernel_size=1, bias=False),
+                nn.BatchNorm2d(mlp[k + 1]),
+                nn.ReLU()
+            ])
+        self.mlp = nn.Sequential(*shared_mlps)
+
+    def forward(self, unknown, unknown_batch_cnt, known, known_batch_cnt, unknown_feats=None, known_feats=None):
+        """
+        Args:
+            unknown: (N1 + N2 ..., 3)
+            known: (M1 + M2 ..., 3)
+            unknow_feats: (N1 + N2 ..., C1)
+            known_feats: (M1 + M2 ..., C2)
+
+        Returns:
+            new_features: (N1 + N2 ..., C_out)
+        """
+        dist, idx = pointnet2_utils.three_nn(unknown, unknown_batch_cnt, known, known_batch_cnt)
+        dist_recip = 1.0 / (dist + 1e-8)
+        norm = torch.sum(dist_recip, dim=-1, keepdim=True)
+        weight = dist_recip / norm
+
+        interpolated_feats = pointnet2_utils.three_interpolate(known_feats, idx, weight)
+
+        if unknown_feats is not None:
+            new_features = torch.cat([interpolated_feats, unknown_feats], dim=1)  # (N1 + N2 ..., C2 + C1)
+        else:
+            new_features = interpolated_feats
+        new_features = new_features.permute(1, 0)[None, :, :, None]  # (1, C, N1 + N2 ..., 1)
+        new_features = self.mlp(new_features)
+
+        new_features = new_features.squeeze(dim=0).squeeze(dim=-1).permute(1, 0)  # (N1 + N2 ..., C)
+        return new_features
+
+
+class VectorPoolLocalInterpolateModule(nn.Module):
+    def __init__(self, mlp, num_voxels, max_neighbour_distance, nsample, neighbor_type, use_xyz=True,
+                 neighbour_distance_multiplier=1.0, xyz_encoding_type='concat'):
+        """
+        Args:
+            mlp:
+            num_voxels:
+            max_neighbour_distance:
+            neighbor_type: 1: ball, others: cube
+            nsample: find all (-1), find limited number(>0)
+            use_xyz:
+            neighbour_distance_multiplier:
+            xyz_encoding_type:
+        """
+        super().__init__()
+        self.num_voxels = num_voxels  # [num_grid_x, num_grid_y, num_grid_z]: number of grids in each local area centered at new_xyz
+        self.num_total_grids = self.num_voxels[0] * self.num_voxels[1] * self.num_voxels[2]
+        self.max_neighbour_distance = max_neighbour_distance
+        self.neighbor_distance_multiplier = neighbour_distance_multiplier
+        self.nsample = nsample
+        self.neighbor_type = neighbor_type
+        self.use_xyz = use_xyz
+        self.xyz_encoding_type = xyz_encoding_type
+
+        if mlp is not None:
+            if self.use_xyz:
+                mlp[0] += 9 if self.xyz_encoding_type == 'concat' else 0
+            shared_mlps = []
+            for k in range(len(mlp) - 1):
+                shared_mlps.extend([
+                    nn.Conv2d(mlp[k], mlp[k + 1], kernel_size=1, bias=False),
+                    nn.BatchNorm2d(mlp[k + 1]),
+                    nn.ReLU()
+                ])
+            self.mlp = nn.Sequential(*shared_mlps)
+        else:
+            self.mlp = None
+
+        self.num_avg_length_of_neighbor_idxs = 1000
+
+    def forward(self, support_xyz, support_features, xyz_batch_cnt, new_xyz, new_xyz_grid_centers, new_xyz_batch_cnt):
+        """
+        Args:
+            support_xyz: (N1 + N2 ..., 3) xyz coordinates of the features
+            support_features: (N1 + N2 ..., C) point-wise features
+            xyz_batch_cnt: (batch_size), [N1, N2, ...]
+            new_xyz: (M1 + M2 ..., 3) centers of the ball query
+            new_xyz_grid_centers: (M1 + M2 ..., num_total_grids, 3) grids centers of each grid
+            new_xyz_batch_cnt: (batch_size), [M1, M2, ...]
+        Returns:
+            new_features: (N1 + N2 ..., C_out)
+        """
+        with torch.no_grad():
+            dist, idx, num_avg_length_of_neighbor_idxs = pointnet2_utils.three_nn_for_vector_pool_by_two_step(
+                support_xyz, xyz_batch_cnt, new_xyz, new_xyz_grid_centers, new_xyz_batch_cnt,
+                self.max_neighbour_distance, self.nsample, self.neighbor_type,
+                self.num_avg_length_of_neighbor_idxs, self.num_total_grids, self.neighbor_distance_multiplier
+            )
+        self.num_avg_length_of_neighbor_idxs = max(self.num_avg_length_of_neighbor_idxs, num_avg_length_of_neighbor_idxs.item())
+
+        dist_recip = 1.0 / (dist + 1e-8)
+        norm = torch.sum(dist_recip, dim=-1, keepdim=True)
+        weight = dist_recip / torch.clamp_min(norm, min=1e-8)
+
+        empty_mask = (idx.view(-1, 3)[:, 0] == -1)
+        idx.view(-1, 3)[empty_mask] = 0
+
+        interpolated_feats = pointnet2_utils.three_interpolate(support_features, idx.view(-1, 3), weight.view(-1, 3))
+        interpolated_feats = interpolated_feats.view(idx.shape[0], idx.shape[1], -1)  # (M1 + M2 ..., num_total_grids, C)
+        if self.use_xyz:
+            near_known_xyz = support_xyz[idx.view(-1, 3).long()].view(-1, 3, 3)  # ( (M1 + M2 ...)*num_total_grids, 3)
+            local_xyz = (new_xyz_grid_centers.view(-1, 1, 3) - near_known_xyz).view(-1, idx.shape[1], 9)
+            if self.xyz_encoding_type == 'concat':
+                interpolated_feats = torch.cat((interpolated_feats, local_xyz), dim=-1)  # ( M1 + M2 ..., num_total_grids, 9+C)
+            else:
+                raise NotImplementedError
+
+        new_features = interpolated_feats.view(-1, interpolated_feats.shape[-1])  # ((M1 + M2 ...) * num_total_grids, C)
+        new_features[empty_mask, :] = 0
+        if self.mlp is not None:
+            new_features = new_features.permute(1, 0)[None, :, :, None]  # (1, C, N1 + N2 ..., 1)
+            new_features = self.mlp(new_features)
+
+            new_features = new_features.squeeze(dim=0).squeeze(dim=-1).permute(1, 0)  # (N1 + N2 ..., C)
+        return new_features
+
+
+class VectorPoolAggregationModule(nn.Module):
+    def __init__(
+            self, input_channels, num_local_voxel=(3, 3, 3), local_aggregation_type='local_interpolation',
+            num_reduced_channels=30, num_channels_of_local_aggregation=32, post_mlps=(128,),
+            max_neighbor_distance=None, neighbor_nsample=-1, neighbor_type=0, neighbor_distance_multiplier=2.0):
+        super().__init__()
+        self.num_local_voxel = num_local_voxel
+        self.total_voxels = self.num_local_voxel[0] * self.num_local_voxel[1] * self.num_local_voxel[2]
+        self.local_aggregation_type = local_aggregation_type
+        assert self.local_aggregation_type in ['local_interpolation', 'voxel_avg_pool', 'voxel_random_choice']
+        self.input_channels = input_channels
+        self.num_reduced_channels = input_channels if num_reduced_channels is None else num_reduced_channels
+        self.num_channels_of_local_aggregation = num_channels_of_local_aggregation
+        self.max_neighbour_distance = max_neighbor_distance
+        self.neighbor_nsample = neighbor_nsample
+        self.neighbor_type = neighbor_type  # 1: ball, others: cube
+
+        if self.local_aggregation_type == 'local_interpolation':
+            self.local_interpolate_module = VectorPoolLocalInterpolateModule(
+                mlp=None, num_voxels=self.num_local_voxel,
+                max_neighbour_distance=self.max_neighbour_distance,
+                nsample=self.neighbor_nsample,
+                neighbor_type=self.neighbor_type,
+                neighbour_distance_multiplier=neighbor_distance_multiplier,
+            )
+            num_c_in = (self.num_reduced_channels + 9) * self.total_voxels
+        else:
+            self.local_interpolate_module = None
+            num_c_in = (self.num_reduced_channels + 3) * self.total_voxels
+
+        num_c_out = self.total_voxels * self.num_channels_of_local_aggregation
+
+        self.separate_local_aggregation_layer = nn.Sequential(
+            nn.Conv1d(num_c_in, num_c_out, kernel_size=1, groups=self.total_voxels, bias=False),
+            nn.BatchNorm1d(num_c_out),
+            nn.ReLU()
+        )
+
+        post_mlp_list = []
+        c_in = num_c_out
+        for cur_num_c in post_mlps:
+            post_mlp_list.extend([
+                nn.Conv1d(c_in, cur_num_c, kernel_size=1, bias=False),
+                nn.BatchNorm1d(cur_num_c),
+                nn.ReLU()
+            ])
+            c_in = cur_num_c
+        self.post_mlps = nn.Sequential(*post_mlp_list)
+
+        self.num_mean_points_per_grid = 20
+        self.init_weights()
+
+    def init_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d):
+                nn.init.kaiming_normal_(m.weight)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            if isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.BatchNorm1d):
+                nn.init.constant_(m.weight, 1.0)
+                nn.init.constant_(m.bias, 0)
+
+    def extra_repr(self) -> str:
+        ret = f'radius={self.max_neighbour_distance}, local_voxels=({self.num_local_voxel}, ' \
+              f'local_aggregation_type={self.local_aggregation_type}, ' \
+              f'num_c_reduction={self.input_channels}->{self.num_reduced_channels}, ' \
+              f'num_c_local_aggregation={self.num_channels_of_local_aggregation}'
+        return ret
+
+    def vector_pool_with_voxel_query(self, xyz, xyz_batch_cnt, features, new_xyz, new_xyz_batch_cnt):
+        use_xyz = 1
+        pooling_type = 0 if self.local_aggregation_type == 'voxel_avg_pool' else 1
+
+        new_features, new_local_xyz, num_mean_points_per_grid, point_cnt_of_grid = pointnet2_utils.vector_pool_with_voxel_query_op(
+            xyz, xyz_batch_cnt, features, new_xyz, new_xyz_batch_cnt,
+            self.num_local_voxel[0], self.num_local_voxel[1], self.num_local_voxel[2],
+            self.max_neighbour_distance, self.num_reduced_channels, use_xyz,
+            self.num_mean_points_per_grid, self.neighbor_nsample, self.neighbor_type,
+            pooling_type
+        )
+        self.num_mean_points_per_grid = max(self.num_mean_points_per_grid, num_mean_points_per_grid.item())
+
+        num_new_pts = new_features.shape[0]
+        new_local_xyz = new_local_xyz.view(num_new_pts, -1, 3)  # (N, num_voxel, 3)
+        new_features = new_features.view(num_new_pts, -1, self.num_reduced_channels)  # (N, num_voxel, C)
+        new_features = torch.cat((new_local_xyz, new_features), dim=-1).view(num_new_pts, -1)
+
+        return new_features, point_cnt_of_grid
+
+    @staticmethod
+    def get_dense_voxels_by_center(point_centers, max_neighbour_distance, num_voxels):
+        """
+        Args:
+            point_centers: (N, 3)
+            max_neighbour_distance: float
+            num_voxels: [num_x, num_y, num_z]
+
+        Returns:
+            voxel_centers: (N, total_voxels, 3)
+        """
+        R = max_neighbour_distance
+        device = point_centers.device
+        x_grids = torch.arange(-R + R / num_voxels[0], R - R / num_voxels[0] + 1e-5, 2 * R / num_voxels[0], device=device)
+        y_grids = torch.arange(-R + R / num_voxels[1], R - R / num_voxels[1] + 1e-5, 2 * R / num_voxels[1], device=device)
+        z_grids = torch.arange(-R + R / num_voxels[2], R - R / num_voxels[2] + 1e-5, 2 * R / num_voxels[2], device=device)
+        x_offset, y_offset, z_offset = torch.meshgrid(x_grids, y_grids, z_grids)  # shape: [num_x, num_y, num_z]
+        xyz_offset = torch.cat((
+            x_offset.contiguous().view(-1, 1),
+            y_offset.contiguous().view(-1, 1),
+            z_offset.contiguous().view(-1, 1)), dim=-1
+        )
+        voxel_centers = point_centers[:, None, :] + xyz_offset[None, :, :]
+        return voxel_centers
+
+    def vector_pool_with_local_interpolate(self, xyz, xyz_batch_cnt, features, new_xyz, new_xyz_batch_cnt):
+        """
+        Args:
+            xyz: (N, 3)
+            xyz_batch_cnt: (batch_size)
+            features: (N, C)
+            new_xyz: (M, 3)
+            new_xyz_batch_cnt: (batch_size)
+        Returns:
+            new_features: (M, total_voxels * C)
+        """
+        voxel_centers = self.get_dense_voxels_by_center(
+            point_centers=new_xyz, max_neighbour_distance=self.max_neighbour_distance, num_voxels=self.num_local_voxel
+        )  # (M1 + M2 + ..., total_voxels, 3)
+        voxel_features = self.local_interpolate_module.forward(
+            support_xyz=xyz, support_features=features, xyz_batch_cnt=xyz_batch_cnt,
+            new_xyz=new_xyz, new_xyz_grid_centers=voxel_centers, new_xyz_batch_cnt=new_xyz_batch_cnt
+        )  # ((M1 + M2 ...) * total_voxels, C)
+
+        voxel_features = voxel_features.contiguous().view(-1, self.total_voxels * voxel_features.shape[-1])
+        return voxel_features
+
+    def forward(self, xyz, xyz_batch_cnt, new_xyz, new_xyz_batch_cnt, features, **kwargs):
+        """
+        :param xyz: (N1 + N2 ..., 3) tensor of the xyz coordinates of the features
+        :param xyz_batch_cnt: (batch_size), [N1, N2, ...]
+        :param new_xyz: (M1 + M2 ..., 3)
+        :param new_xyz_batch_cnt: (batch_size), [M1, M2, ...]
+        :param features: (N1 + N2 ..., C) tensor of the descriptors of the the features
+        :return:
+            new_xyz: (M1 + M2 ..., 3) tensor of the new features' xyz
+            new_features: (M1 + M2 ..., \sum_k(mlps[k][-1])) tensor of the new_features descriptors
+        """
+        N, C = features.shape
+
+        assert C % self.num_reduced_channels == 0, \
+            f'the input channels ({C}) should be an integral multiple of num_reduced_channels({self.num_reduced_channels})'
+
+        features = features.view(N, -1, self.num_reduced_channels).sum(dim=1)
+
+        if self.local_aggregation_type in ['voxel_avg_pool', 'voxel_random_choice']:
+            vector_features, point_cnt_of_grid = self.vector_pool_with_voxel_query(
+                xyz=xyz, xyz_batch_cnt=xyz_batch_cnt, features=features,
+                new_xyz=new_xyz, new_xyz_batch_cnt=new_xyz_batch_cnt
+            )
+        elif self.local_aggregation_type == 'local_interpolation':
+            vector_features = self.vector_pool_with_local_interpolate(
+                xyz=xyz, xyz_batch_cnt=xyz_batch_cnt, features=features,
+                new_xyz=new_xyz, new_xyz_batch_cnt=new_xyz_batch_cnt
+            )  # (M1 + M2 + ..., total_voxels * C)
+        else:
+            raise NotImplementedError
+
+        vector_features = vector_features.permute(1, 0)[None, :, :]  # (1, num_voxels * C, M1 + M2 ...)
+
+        new_features = self.separate_local_aggregation_layer(vector_features)
+
+        new_features = self.post_mlps(new_features)
+        new_features = new_features.squeeze(dim=0).permute(1, 0)
+        return new_xyz, new_features
+
+
+class VectorPoolAggregationModuleMSG(nn.Module):
+    def __init__(self, input_channels, config):
+        super().__init__()
+        self.model_cfg = config
+        self.num_groups = self.model_cfg.NUM_GROUPS
+
+        self.layers = []
+        c_in = 0
+        for k in range(self.num_groups):
+            cur_config = self.model_cfg[f'GROUP_CFG_{k}']
+            cur_vector_pool_module = VectorPoolAggregationModule(
+                input_channels=input_channels, num_local_voxel=cur_config.NUM_LOCAL_VOXEL,
+                post_mlps=cur_config.POST_MLPS,
+                max_neighbor_distance=cur_config.MAX_NEIGHBOR_DISTANCE,
+                neighbor_nsample=cur_config.NEIGHBOR_NSAMPLE,
+                local_aggregation_type=self.model_cfg.LOCAL_AGGREGATION_TYPE,
+                num_reduced_channels=self.model_cfg.get('NUM_REDUCED_CHANNELS', None),
+                num_channels_of_local_aggregation=self.model_cfg.NUM_CHANNELS_OF_LOCAL_AGGREGATION,
+                neighbor_distance_multiplier=2.0
+            )
+            self.__setattr__(f'layer_{k}', cur_vector_pool_module)
+            c_in += cur_config.POST_MLPS[-1]
+
+        c_in += 3  # use_xyz
+
+        shared_mlps = []
+        for cur_num_c in self.model_cfg.MSG_POST_MLPS:
+            shared_mlps.extend([
+                nn.Conv1d(c_in, cur_num_c, kernel_size=1, bias=False),
+                nn.BatchNorm1d(cur_num_c),
+                nn.ReLU()
+            ])
+            c_in = cur_num_c
+        self.msg_post_mlps = nn.Sequential(*shared_mlps)
+
+    def forward(self, **kwargs):
+        features_list = []
+        for k in range(self.num_groups):
+            cur_xyz, cur_features = self.__getattr__(f'layer_{k}')(**kwargs)
+            features_list.append(cur_features)
+
+        features = torch.cat(features_list, dim=-1)
+        features = torch.cat((cur_xyz, features), dim=-1)
+        features = features.permute(1, 0)[None, :, :]  # (1, C, N)
+        new_features = self.msg_post_mlps(features)
+        new_features = new_features.squeeze(dim=0).permute(1, 0)  # (N, C)
+
+        return cur_xyz, new_features