Source code for vista.entities.sensors.camera_utils.ViewSynthesis

import os
from typing import Dict, Optional, Tuple, List
from enum import Enum
import numpy as np
import pyrender
import copy

from . import CameraParams
from ....utils import transform, logging, misc

ZNEAR = 0.01
ZFAR = 10000


class DepthModes(Enum):
    FIXED_PLANE = 1
    INPUT_DISP = 2
    MONODEPTH = 3


[docs]class ViewSynthesis:
    """ A RGB synthesizer that simulates RGB image at novel viewpoint around a pre-collected
    RGB image/video dataset. Conceptually, it (1) projects a reference 2D RGB image to 3D colored
    mesh using camera projection matrices and (approximated) depth, (2) place virtual objects
    in the scene, (3) render RGB image at novel viewpoint.

    Args:
        camera_param (CameraParams): Camera parameter object of the virtual camera.
        config (Dict): Configuration of the synthesizer.
        init_with_bg_mesh (bool): Whether to initialize with background mesh; default
            is set to ``True``.

    """
    DEFAULT_CONFIG = {
        'depth_mode': DepthModes.FIXED_PLANE,
        'znear': ZNEAR,
        'zfar': ZFAR,
        'use_lighting': False,
        'recoloring_factor': 0.5,
    }

    def __init__(self,
                 camera_param: CameraParams,
                 config: Dict,
                 init_with_bg_mesh: Optional[bool] = True) -> None:
        # Parse configuration
        self._camera_param = camera_param
        config = misc.merge_dict(config, self.DEFAULT_CONFIG)
        self._config = config
        if isinstance(self._config['depth_mode'], str):
            self._config['depth_mode'] = getattr(DepthModes,
                                                 self._config['depth_mode'])

        # Renderer and scene
        self._renderer = pyrender.OffscreenRenderer(
            self._camera_param.get_width(), self._camera_param.get_height())
        self._scene = pyrender.Scene(ambient_light=[1., 1., 1.],
                                     bg_color=[0, 0, 0])

        # Camera for rendering
        cam_w = camera_param.get_width()
        cam_h = camera_param.get_height()
        camera = pyrender.IntrinsicsCamera(
            fx=self._camera_param._fx,
            fy=self._camera_param._fy,
            cx=cam_w / 2.,  # NOTE: assume calibrated camera
            cy=cam_h / 2.,
            znear=self._config['znear'],
            zfar=self._config['zfar'])
        self._camera_node = pyrender.Node(
            name='camera',
            camera=camera,
            translation=self._camera_param.get_position()[:, 0],
            rotation=self._camera_param.get_quaternion()[:, 0])
        self._scene.add_node(self._camera_node)

        # Mesh of background. Can add more by calling add_bg_mesh for different camera_param
        self._world_rays: Dict[str, np.ndarray] = dict()
        if self._config['depth_mode'] == DepthModes.FIXED_PLANE:
            self._depth: Dict[str, np.ndarray] = dict()
        self._bg_node: Dict[str, pyrender.Node] = dict()
        if init_with_bg_mesh:
            self.add_bg_mesh(self._camera_param)

        # Scene for non-background objects and object nodes
        self._object_scene = pyrender.Scene(ambient_light=[1., 1., 1.],
                                            bg_color=[0, 0, 0])
        self._object_nodes = dict()

[docs]    def synthesize(
        self,
        trans: np.ndarray,
        rot: np.ndarray,
        imgs: Dict[str, np.ndarray],
        depth: Optional[Dict[str, np.ndarray]] = None
    ) -> Tuple[np.ndarray, np.ndarray]:
        """ Synthesize RGB image at the novel viewpoint specified by ``trans`` and
        ``rot`` with respect to the nominal viewpoint that corresponds to a set of
        RGB images and depth maps.

        Args:
            trans (np.ndarray): Translation vector.
            rot (np.ndarray): Rotation vector in Euler angle.
            imgs (Dict[str, np.ndarray]): A set of images (potentially from multiple camera).
            depth (Dict[str, np.ndarray]): A set of depth maps corresponding to ``imgs``.

        Returns:
            Returns a tuple (``array_1``, ``array_2``), where ``array_1`` is the
            synthesized RGB image and ``array_2`` is the corresponding depth image.

        """

        for name, img in imgs.items():
            # Need this otherwise will cause memory leak
            if os.environ.get('PYOPENGL_PLATFORM', None) != 'egl':
                self._bg_node[name].mesh = copy.deepcopy(
                    self._bg_node[name].mesh)

            # Refresh meshes in renderer; otherwise mesh vertex/color won't update
            mesh = self._bg_node[name].mesh
            for prim in mesh.primitives:
                prim._unbind()
                prim._remove_from_context()

            if mesh in self._renderer._renderer._meshes:
                self._renderer._renderer._meshes.remove(mesh)

            # Update mesh vertex positions (this won't affect node-level translation and rotation)
            if self._config['depth_mode'] == DepthModes.FIXED_PLANE:
                depth = self._depth[name]
            else:
                raise NotImplementedError

            depth = depth.reshape((1, -1))
            world_coords = np.multiply(-depth, self._world_rays[name])
            mesh.primitives[0].positions = world_coords.T

            # Update mesh face colors
            colors = img[:, ::-1] / 255.
            mesh.primitives[0].color_0[:, :3] = colors.reshape((-1, 3))

        # Update camera pose based on the requested viewpoint (with camera
        # relative pose to the car as offset)
        trans += self._camera_param.get_position()[:, 0]
        rot += transform.quat2euler(self._camera_param.get_quaternion()[:, 0])
        self._camera_node.matrix = transform.vec2mat(trans, rot)

        # Render background
        color_bg, _ = self._renderer.render(
            self._scene, flags=pyrender.constants.RenderFlags.FLAT)

        # Render scene object
        self._object_scene.clear()

        light = pyrender.DirectionalLight(
            [255, 255, 255], self.config['directional_light_intensity'])
        self._object_scene.add(light)

        self._object_scene.add_node(self._camera_node)

        for name, object_node in self._object_nodes.items():
            self._object_scene.add_node(object_node)

        color_objects, depth_objects = self._renderer.render(
            self._object_scene)
        color_objects = color_objects[:, :, ::
                                      -1]  # TODO: not sure why we need this

        # Overlay
        mask = (depth_objects != 0.).astype(np.uint8)[:, :, None]

        if mask.sum() != 0:  # recoloring
            color_bg_mean = color_bg.mean(0).mean(0)
            color_objects_mean = (color_objects *
                                  mask).sum(0).sum(0) / mask.sum()
            color_objects = color_objects + (
                color_bg_mean -
                color_objects_mean) * self.config['recoloring_factor']
            color_objects = np.clip(color_objects, 0, 255).astype(np.uint8)
        else:  # agent out-of-view
            pass

        color = (1 - mask) * color_bg + mask * color_objects

        return color, depth

[docs]    def update_object_node(self, name: str, mesh: pyrender.Mesh,
                           trans: np.ndarray, quat: np.ndarray) -> None:
        """ Update the virtual object in the scene.

        Args:
            name (str): Name of the virtual object.
            mesh (pyrender.Mesh): Mesh of the virtual object.
            trans (np.ndarray): Translation vector.
            quat (np.ndarray): Quaternion vector.

        """
        self._object_nodes[name] = pyrender.Node(name=name,
                                                 mesh=mesh,
                                                 translation=trans,
                                                 rotation=quat)

[docs]    def add_bg_mesh(self, camera_param: CameraParams) -> None:
        """ Add background mesh to the scene based on camera projection and the
        initial depth. The color of the mesh will be updated at every ``synthesize``
        call and if not using ground-plane depth approximation, the geometry of
        the mesh will be also updated.

        Args:
            camera_param (CameraParams): Camera parameter of the virtual camera.

        """
        # Projection and re-projection parameters
        K = camera_param.get_K().copy()
        logging.debug('Hacky way to handle unrectified image')
        cam_w = camera_param.get_width()
        cam_h = camera_param.get_height()
        K[0, 2] = cam_w / 2.
        K[1, 2] = cam_h / 2.
        K_inv = np.linalg.inv(K)

        # Mesh coordinates, faces, and rays
        name = camera_param.name
        homo_coords, mesh_faces = self._get_homogeneous_image_coords(
            camera_param, get_mesh=True)
        self._world_rays[name] = np.matmul(K_inv, homo_coords)

        # Get depth for ground plane assumption
        if self._config['depth_mode'] == DepthModes.FIXED_PLANE:
            normal = np.reshape(camera_param.get_ground_plane()[:3], [1, 3])
            d = camera_param.get_ground_plane()[3]
            k = np.divide(d, np.matmul(normal, self._world_rays[name]))
            k[k < 0] = self._config[
                'zfar'] / 10.  # should be smaller than actual zfar
            if camera_param == self._camera_param:
                # NOTE: hacky way to make image from the main camera have fronter order
                logging.debug(
                    'Hacky way to make main camera image have fronter order')
                k = k / 1.1
            self._depth[camera_param.name] = k

        # Add mesh to scene (fix node level translation and rotation)
        mesh = pyrender.Mesh([
            pyrender.Primitive(
                positions=self._world_rays[name].T,
                indices=mesh_faces.T,
                color_0=np.ones((self._world_rays[name].shape[1], 4)),
                mode=pyrender.constants.GLTF.TRIANGLES,
            )
        ])
        self._bg_node[name] = pyrender.Node(
            name='bg_{}'.format(name),  # project camera view to 3D
            mesh=mesh,
            translation=camera_param.get_position()[:, 0],
            rotation=camera_param.get_quaternion()[:, 0])
        self._scene.add_node(self._bg_node[name])

    def _get_homogeneous_image_coords(self, camera_param, get_mesh=False):
        cam_w = camera_param.get_width()
        cam_h = camera_param.get_height()

        xx, yy = np.meshgrid(np.arange(cam_w), np.arange(cam_h))
        coords = np.stack(
            (xx.reshape(-1), yy.reshape(-1), np.ones_like(xx).reshape(-1)),
            axis=0)

        if not get_mesh:
            return coords
        else:
            upper = np.array([[0, 0], [0, 1], [1, 1]])
            lower = np.array([[0, 0], [1, 1], [1, 0]])
            mesh_tri = []

            # FIXME TODO: vectorize this double for-loop
            logging.debug('Homogeneous coordinate computation not vectorized')
            for i in range(0, cam_h - 1):
                for j in range(0, cam_w - 1):
                    c = np.array([i, j])
                    mesh_tri.append(
                        np.ravel_multi_index((c + upper).T, (cam_h, cam_w)))
                    mesh_tri.append(
                        np.ravel_multi_index((c + lower).T, (cam_h, cam_w)))
            mesh_tri = np.stack(mesh_tri, axis=1)

            return coords, mesh_tri

    @property
    def bg_mesh_names(self) -> List[str]:
        """ Names of all background meshes in the scene. """
        return self._bg_node.keys()

    @property
    def object_nodes(self) -> List[pyrender.Node]:
        """ Pyrender nodes of all virtual objects added to the scene. """
        return self._object_nodes

    @property
    def config(self) -> Dict:
        """ Configuration of the view synthesizer. """
        return self._config