Source code for xrmocap.utils.camera_utils

import numpy as np
import torch


[docs]def unfold_camera_param(camera: dict): """This function is to extract camera extrinsic, intrinsic and distorsion parameters from dictionary. Args: camera (dict): Dictionary to store the camera parameters. Returns: R (Union[np.ndarray, torch.Tensor]): Extrinsic parameters, rotation matrix. T (Union[np.ndarray, torch.Tensor]): Extrinsic parameters, translation matrix. f (Union[np.ndarray, torch.Tensor]): Focal length in x, y direction. c (Union[np.ndarray, torch.Tensor]): Camera center. k (Union[list, torch.Tensor]): Radial distortion coefficients. p (Union[list, torch.Tensor]): Tangential distortion coefficients. """ R = camera['R'] T = camera['T'] K = camera['K'] dist_coeff = camera['dist_coeff'] if not hasattr(R, 'device'): fx = K[0, 0] fy = K[1, 1] cx = K[0, 2] cy = K[1, 2] k = dist_coeff[[0, 1, 4]] p = dist_coeff[[2, 3]] f = np.array([[fx], [fy]]).reshape(-1, 1) c = np.array([[cx], [cy]]).reshape(-1, 1) return R, T, f, c, k, p else: device = R.device R = torch.as_tensor(R, dtype=torch.float, device=device) T = torch.as_tensor(T, dtype=torch.float, device=device) K = torch.as_tensor(K, dtype=torch.float, device=device) dist_coeff = torch.as_tensor( dist_coeff, dtype=torch.float, device=device) if R.ndim == 3: # [bs, (cam_param)] R = R.reshape(3, 3) T = T.reshape(3, 1) K = K.reshape(3, 3) dist_coeff = dist_coeff.reshape(8, 1) k = dist_coeff[[0, 1, 4]] p = dist_coeff[[2, 3]] f = torch.tensor([K[0, 0], K[1, 1]], dtype=torch.float, device=device).reshape(2, 1) c = torch.as_tensor([[K[0, 2]], [K[1, 2]]], dtype=torch.float, device=device).reshape(2, 1) return R, T, f, c, k, p elif R.ndim == 4: # [bs, n_views, (cam_param)] batch_size, n_views = R.shape[:2] k = dist_coeff[:, :, [0, 1, 4], None] p = dist_coeff[:, :, [2, 3], None] f = torch.tensor( torch.stack([K[:, :, 0, 0], K[:, :, 1, 1]], dim=2), dtype=torch.float, device=device).view(batch_size, n_views, 2, 1) c = torch.as_tensor( torch.stack([K[:, :, 0, 2], K[:, :, 1, 2]], dim=2), dtype=torch.float, device=device).view(batch_size, n_views, 2, 1) return R, T, f, c, k, p else: raise ValueError(f'Invalid camera parameter shape: {R.shape}')
[docs]def project_point_radial(x, R, T, f, c, k, p): """This function is to project a point in 3D space to 2D pixel space with given camera parameters. Args: x: Nx3 points in world coordinates R: 3x3 Camera rotation matrix T: 3x1 Camera translation parameters f: (scalar) Camera focal length c: 2x1 Camera center k: 3x1 Camera radial distortion coefficients p: 2x1 Camera tangential distortion coefficients Returns: ypixel.T: Nx2 points in pixel space """ if not hasattr(x, 'device'): n = x.shape[0] x_cam = R.dot(x.T - T) y = x_cam[:2] / (x_cam[2] + 1e-5) r2 = np.sum(y**2, axis=0) radial = 1 + np.einsum('ij,ij->j', np.tile(k, (1, n)), np.array([r2, r2**2, r2**3])) tan = p[0] * y[1] + p[1] * y[0] y = y * np.tile(radial + 2 * tan, (2, 1)) + np.outer( np.array([p[1], p[0]]).reshape(-1), r2) ypixel = np.multiply(f, y) + c return ypixel.T else: if x.ndim == 2: n = x.shape[0] x_cam = torch.mm(R, torch.t(x) - T) y = x_cam[:2] / (x_cam[2] + 1e-5) kexp = k.repeat((1, n)) r2 = torch.sum(y**2, 0, keepdim=True) r2exp = torch.cat([r2, r2**2, r2**3], 0) radial = 1 + torch.einsum('ij,ij->j', kexp, r2exp) tan = p[0] * y[1] + p[1] * y[0] corr = (radial + 2 * tan).repeat((2, 1)) y = y * corr + torch.ger( torch.cat([p[1], p[0]]).view(-1), r2.view(-1)) ypixel = (f * y) + c return torch.t(ypixel) elif x.ndim == 4: bs, n_view, n_bins, _ = x.shape x_cam = torch.matmul(R, x.transpose(2, 3) - T) y = x_cam[:, :, :2] / (x_cam[:, :, 2:] + 1e-5) kexp = k.repeat(1, 1, 1, n_bins) r2 = torch.sum(y**2, 2, keepdim=True) r2exp = torch.cat([r2, r2**2, r2**3], 2) radial = 1 + torch.einsum('bvij,bvij->bvj', kexp, r2exp) tan = p[:, :, 0] * y[:, :, 1] + p[:, :, 1] * y[:, :, 0] corr = (radial + 2 * tan).unsqueeze(2).expand(-1, -1, 2, -1) y = ( y * corr + torch.matmul(torch.stack([p[:, :, 1], p[:, :, 0]], dim=2), r2)) ypixel = (f * y) + c return ypixel.transpose(2, 3)
def project_pose(x, camera): R, T, f, c, k, p = unfold_camera_param(camera) return project_point_radial(x, R, T, f, c, k, p)