Source code for vista.entities.sensors.camera_utils.ViewSynthesis

import os
from typing import Dict, Optional, Tuple, List
from enum import Enum
import numpy as np
import pyrender
import copy

from . import CameraParams
from ....utils import transform, logging, misc

ZNEAR = 0.01
ZFAR = 10000


class DepthModes(Enum):
    FIXED_PLANE = 1
    INPUT_DISP = 2
    MONODEPTH = 3


[docs]class ViewSynthesis: """ A RGB synthesizer that simulates RGB image at novel viewpoint around a pre-collected RGB image/video dataset. Conceptually, it (1) projects a reference 2D RGB image to 3D colored mesh using camera projection matrices and (approximated) depth, (2) place virtual objects in the scene, (3) render RGB image at novel viewpoint. Args: camera_param (CameraParams): Camera parameter object of the virtual camera. config (Dict): Configuration of the synthesizer. init_with_bg_mesh (bool): Whether to initialize with background mesh; default is set to ``True``. """ DEFAULT_CONFIG = { 'depth_mode': DepthModes.FIXED_PLANE, 'znear': ZNEAR, 'zfar': ZFAR, 'use_lighting': False, 'recoloring_factor': 0.5, } def __init__(self, camera_param: CameraParams, config: Dict, init_with_bg_mesh: Optional[bool] = True) -> None: # Parse configuration self._camera_param = camera_param config = misc.merge_dict(config, self.DEFAULT_CONFIG) self._config = config if isinstance(self._config['depth_mode'], str): self._config['depth_mode'] = getattr(DepthModes, self._config['depth_mode']) # Renderer and scene self._renderer = pyrender.OffscreenRenderer( self._camera_param.get_width(), self._camera_param.get_height()) self._scene = pyrender.Scene(ambient_light=[1., 1., 1.], bg_color=[0, 0, 0]) # Camera for rendering cam_w = camera_param.get_width() cam_h = camera_param.get_height() camera = pyrender.IntrinsicsCamera( fx=self._camera_param._fx, fy=self._camera_param._fy, cx=cam_w / 2., # NOTE: assume calibrated camera cy=cam_h / 2., znear=self._config['znear'], zfar=self._config['zfar']) self._camera_node = pyrender.Node( name='camera', camera=camera, translation=self._camera_param.get_position()[:, 0], rotation=self._camera_param.get_quaternion()[:, 0]) self._scene.add_node(self._camera_node) # Mesh of background. Can add more by calling add_bg_mesh for different camera_param self._world_rays: Dict[str, np.ndarray] = dict() if self._config['depth_mode'] == DepthModes.FIXED_PLANE: self._depth: Dict[str, np.ndarray] = dict() self._bg_node: Dict[str, pyrender.Node] = dict() if init_with_bg_mesh: self.add_bg_mesh(self._camera_param) # Scene for non-background objects and object nodes self._object_scene = pyrender.Scene(ambient_light=[1., 1., 1.], bg_color=[0, 0, 0]) self._object_nodes = dict()
[docs] def synthesize( self, trans: np.ndarray, rot: np.ndarray, imgs: Dict[str, np.ndarray], depth: Optional[Dict[str, np.ndarray]] = None ) -> Tuple[np.ndarray, np.ndarray]: """ Synthesize RGB image at the novel viewpoint specified by ``trans`` and ``rot`` with respect to the nominal viewpoint that corresponds to a set of RGB images and depth maps. Args: trans (np.ndarray): Translation vector. rot (np.ndarray): Rotation vector in Euler angle. imgs (Dict[str, np.ndarray]): A set of images (potentially from multiple camera). depth (Dict[str, np.ndarray]): A set of depth maps corresponding to ``imgs``. Returns: Returns a tuple (``array_1``, ``array_2``), where ``array_1`` is the synthesized RGB image and ``array_2`` is the corresponding depth image. """ for name, img in imgs.items(): # Need this otherwise will cause memory leak if os.environ.get('PYOPENGL_PLATFORM', None) != 'egl': self._bg_node[name].mesh = copy.deepcopy( self._bg_node[name].mesh) # Refresh meshes in renderer; otherwise mesh vertex/color won't update mesh = self._bg_node[name].mesh for prim in mesh.primitives: prim._unbind() prim._remove_from_context() if mesh in self._renderer._renderer._meshes: self._renderer._renderer._meshes.remove(mesh) # Update mesh vertex positions (this won't affect node-level translation and rotation) if self._config['depth_mode'] == DepthModes.FIXED_PLANE: depth = self._depth[name] else: raise NotImplementedError depth = depth.reshape((1, -1)) world_coords = np.multiply(-depth, self._world_rays[name]) mesh.primitives[0].positions = world_coords.T # Update mesh face colors colors = img[:, ::-1] / 255. mesh.primitives[0].color_0[:, :3] = colors.reshape((-1, 3)) # Update camera pose based on the requested viewpoint (with camera # relative pose to the car as offset) trans += self._camera_param.get_position()[:, 0] rot += transform.quat2euler(self._camera_param.get_quaternion()[:, 0]) self._camera_node.matrix = transform.vec2mat(trans, rot) # Render background color_bg, _ = self._renderer.render( self._scene, flags=pyrender.constants.RenderFlags.FLAT) # Render scene object self._object_scene.clear() light = pyrender.DirectionalLight( [255, 255, 255], self.config['directional_light_intensity']) self._object_scene.add(light) self._object_scene.add_node(self._camera_node) for name, object_node in self._object_nodes.items(): self._object_scene.add_node(object_node) color_objects, depth_objects = self._renderer.render( self._object_scene) color_objects = color_objects[:, :, :: -1] # TODO: not sure why we need this # Overlay mask = (depth_objects != 0.).astype(np.uint8)[:, :, None] if mask.sum() != 0: # recoloring color_bg_mean = color_bg.mean(0).mean(0) color_objects_mean = (color_objects * mask).sum(0).sum(0) / mask.sum() color_objects = color_objects + ( color_bg_mean - color_objects_mean) * self.config['recoloring_factor'] color_objects = np.clip(color_objects, 0, 255).astype(np.uint8) else: # agent out-of-view pass color = (1 - mask) * color_bg + mask * color_objects return color, depth
[docs] def update_object_node(self, name: str, mesh: pyrender.Mesh, trans: np.ndarray, quat: np.ndarray) -> None: """ Update the virtual object in the scene. Args: name (str): Name of the virtual object. mesh (pyrender.Mesh): Mesh of the virtual object. trans (np.ndarray): Translation vector. quat (np.ndarray): Quaternion vector. """ self._object_nodes[name] = pyrender.Node(name=name, mesh=mesh, translation=trans, rotation=quat)
[docs] def add_bg_mesh(self, camera_param: CameraParams) -> None: """ Add background mesh to the scene based on camera projection and the initial depth. The color of the mesh will be updated at every ``synthesize`` call and if not using ground-plane depth approximation, the geometry of the mesh will be also updated. Args: camera_param (CameraParams): Camera parameter of the virtual camera. """ # Projection and re-projection parameters K = camera_param.get_K().copy() logging.debug('Hacky way to handle unrectified image') cam_w = camera_param.get_width() cam_h = camera_param.get_height() K[0, 2] = cam_w / 2. K[1, 2] = cam_h / 2. K_inv = np.linalg.inv(K) # Mesh coordinates, faces, and rays name = camera_param.name homo_coords, mesh_faces = self._get_homogeneous_image_coords( camera_param, get_mesh=True) self._world_rays[name] = np.matmul(K_inv, homo_coords) # Get depth for ground plane assumption if self._config['depth_mode'] == DepthModes.FIXED_PLANE: normal = np.reshape(camera_param.get_ground_plane()[:3], [1, 3]) d = camera_param.get_ground_plane()[3] k = np.divide(d, np.matmul(normal, self._world_rays[name])) k[k < 0] = self._config[ 'zfar'] / 10. # should be smaller than actual zfar if camera_param == self._camera_param: # NOTE: hacky way to make image from the main camera have fronter order logging.debug( 'Hacky way to make main camera image have fronter order') k = k / 1.1 self._depth[camera_param.name] = k # Add mesh to scene (fix node level translation and rotation) mesh = pyrender.Mesh([ pyrender.Primitive( positions=self._world_rays[name].T, indices=mesh_faces.T, color_0=np.ones((self._world_rays[name].shape[1], 4)), mode=pyrender.constants.GLTF.TRIANGLES, ) ]) self._bg_node[name] = pyrender.Node( name='bg_{}'.format(name), # project camera view to 3D mesh=mesh, translation=camera_param.get_position()[:, 0], rotation=camera_param.get_quaternion()[:, 0]) self._scene.add_node(self._bg_node[name])
def _get_homogeneous_image_coords(self, camera_param, get_mesh=False): cam_w = camera_param.get_width() cam_h = camera_param.get_height() xx, yy = np.meshgrid(np.arange(cam_w), np.arange(cam_h)) coords = np.stack( (xx.reshape(-1), yy.reshape(-1), np.ones_like(xx).reshape(-1)), axis=0) if not get_mesh: return coords else: upper = np.array([[0, 0], [0, 1], [1, 1]]) lower = np.array([[0, 0], [1, 1], [1, 0]]) mesh_tri = [] # FIXME TODO: vectorize this double for-loop logging.debug('Homogeneous coordinate computation not vectorized') for i in range(0, cam_h - 1): for j in range(0, cam_w - 1): c = np.array([i, j]) mesh_tri.append( np.ravel_multi_index((c + upper).T, (cam_h, cam_w))) mesh_tri.append( np.ravel_multi_index((c + lower).T, (cam_h, cam_w))) mesh_tri = np.stack(mesh_tri, axis=1) return coords, mesh_tri @property def bg_mesh_names(self) -> List[str]: """ Names of all background meshes in the scene. """ return self._bg_node.keys() @property def object_nodes(self) -> List[pyrender.Node]: """ Pyrender nodes of all virtual objects added to the scene. """ return self._object_nodes @property def config(self) -> Dict: """ Configuration of the view synthesizer. """ return self._config