[unreal::imagecapture] improve simgetimages speed by changing pixel f…

…ormat to RGBA8 in PIPCamera.cpp, and only copying RGB in RenderRequest.cpp
microsoft · Apr 25, 2019 · d83c7c4 · d83c7c4
1 parent 9bb3434
commit d83c7c4
Show file tree

Hide file tree

Showing 14 changed files with 82 additions and 77 deletions.
diff --git a/PythonClient/car/drive_straight.py b/PythonClient/car/drive_straight.py
@@ -28,9 +28,9 @@
 def get_image():
     image = client.simGetImages([airsim.ImageRequest("0", airsim.ImageType.Scene, False, False)])[0]
     image1d = np.fromstring(image.image_data_uint8, dtype=np.uint8)
-    image_rgba = image1d.reshape(image.height, image.width, 4)
-    image_rgba = np.flipud(image_rgba)
-    return image_rgba[:, :, 0:3]
+    image_rgb = image1d.reshape(image.height, image.width, 3)
+    image_rgb = np.flipud(image_rgb)
+    return image_rgb
 
 while (True):
     car_state = client.getCarState()

diff --git a/PythonClient/car/hello_car.py b/PythonClient/car/hello_car.py
@@ -53,7 +53,7 @@
         airsim.ImageRequest("0", airsim.ImageType.DepthVis),  #depth visualization image
         airsim.ImageRequest("1", airsim.ImageType.DepthPerspective, True), #depth in perspective projection
         airsim.ImageRequest("1", airsim.ImageType.Scene), #scene vision image in png format
-        airsim.ImageRequest("1", airsim.ImageType.Scene, False, False)])  #scene vision image in uncompressed RGBA array
+        airsim.ImageRequest("1", airsim.ImageType.Scene, False, False)])  #scene vision image in uncompressed RGB array
     print('Retrieved images: %d', len(responses))
 
     for response in responses:
@@ -69,10 +69,9 @@
         else: #uncompressed array
             print("Type %d, size %d" % (response.image_type, len(response.image_data_uint8)))
             img1d = np.fromstring(response.image_data_uint8, dtype=np.uint8) #get numpy array
-            img_rgba = img1d.reshape(response.height, response.width, 4) #reshape array to 4 channel image array H X W X 4
-            img_rgba = np.flipud(img_rgba) #original image is flipped vertically
-            img_rgba[:,:,1:2] = 100 #just for fun add little bit of green in all pixels
-            airsim.write_png(os.path.normpath(filename + '.greener.png'), img_rgba) #write to png 
+            img_rgb = img1d.reshape(response.height, response.width, 3) #reshape array to 3 channel image array H X W X 3
+            img_rgb = np.flipud(img_rgb) #original image is flipped vertically
+            airsim.write_png(os.path.normpath(filename + '.png'), img_rgb) #write to png 
 
 
 #restore to original state

diff --git a/PythonClient/car/legacy_hello_car.py b/PythonClient/car/legacy_hello_car.py
@@ -40,7 +40,7 @@
     ImageRequest(0, airsim.AirSimImageType.DepthVis),  #depth visualiztion image
     ImageRequest(1, airsim.AirSimImageType.DepthPerspective, True), #depth in perspective projection
     ImageRequest(1, airsim.AirSimImageType.Scene), #scene vision image in png format
-    ImageRequest(1, airsim.AirSimImageType.Scene, False, False)])  #scene vision image in uncompressed RGBA array
+    ImageRequest(1, airsim.AirSimImageType.Scene, False, False)])  #scene vision image in uncompressed RGB array
 print('Retrieved images: %d' % len(responses))
 
 tmp_dir = os.path.join(tempfile.gettempdir(), "airsim_drone")
@@ -64,10 +64,9 @@
     else: #uncompressed array
         print("Type %d, size %d" % (response.image_type, len(response.image_data_uint8)))
         img1d = np.fromstring(response.image_data_uint8, dtype=np.uint8) #get numpy array
-        img_rgba = img1d.reshape(response.height, response.width, 4) #reshape array to 4 channel image array H X W X 4
-        img_rgba = np.flipud(img_rgba) #original image is fliped vertically
-        img_rgba[:,:,1:2] = 100 #just for fun add little bit of green in all pixels
-        AirSimClientBase.write_png(os.path.normpath(filename + '.greener.png'), img_rgba) #write to png
+        img_rgb = img1d.reshape(response.height, response.width, 3) #reshape array to 3 channel image array H X W X 3
+        img_rgb = np.flipud(img_rgb) #original image is fliped vertically
+        AirSimClientBase.write_png(os.path.normpath(filename + '.png'), img_rgb) #write to png
 
 AirSimClientBase.wait_key('Press any key to reset to original state')
 

diff --git a/PythonClient/car/multi_agent_car.py b/PythonClient/car/multi_agent_car.py
@@ -91,11 +91,11 @@
     # get camera images from the car
     responses1 = client.simGetImages([
         airsim.ImageRequest("0", airsim.ImageType.DepthVis),  #depth visualization image
-        airsim.ImageRequest("1", airsim.ImageType.Scene, False, False)], "Car1")  #scene vision image in uncompressed RGBA array
+        airsim.ImageRequest("1", airsim.ImageType.Scene, False, False)], "Car1")  #scene vision image in uncompressed RGB array
     print('Car1: Retrieved images: %d' % (len(responses1)))
     responses2 = client.simGetImages([
         airsim.ImageRequest("0", airsim.ImageType.Segmentation),  #depth visualization image
-        airsim.ImageRequest("1", airsim.ImageType.Scene, False, False)], "Car2")  #scene vision image in uncompressed RGBA array
+        airsim.ImageRequest("1", airsim.ImageType.Scene, False, False)], "Car2")  #scene vision image in uncompressed RGB array
     print('Car2: Retrieved images: %d' % (len(responses2)))
 
     for response in responses1 + responses2:
@@ -110,10 +110,9 @@
         else: #uncompressed array
             print("Type %d, size %d" % (response.image_type, len(response.image_data_uint8)))
             img1d = np.fromstring(response.image_data_uint8, dtype=np.uint8) #get numpy array
-            img_rgba = img1d.reshape(response.height, response.width, 4) #reshape array to 4 channel image array H X W X 4
-            img_rgba = np.flipud(img_rgba) #original image is flipped vertically
-            img_rgba[:,:,1:2] = 100 #just for fun add little bit of green in all pixels
-            airsim.write_png(os.path.normpath(filename + '.greener.png'), img_rgba) #write to png 
+            img_rgb = img1d.reshape(response.height, response.width, 3) #reshape array to 3 channel image array H X W X 3
+            img_rgb = np.flipud(img_rgb) #original image is flipped vertically
+            airsim.write_png(os.path.normpath(filename + '.png'), img_rgb) #write to png 
 
 
 #restore to original state

diff --git a/PythonClient/computer_vision/segmentation.py b/PythonClient/computer_vision/segmentation.py
@@ -54,20 +54,11 @@
     else: #uncompressed array - numpy demo
         print("Type %d, size %d" % (response.image_type, len(response.image_data_uint8)))
         img1d = np.fromstring(response.image_data_uint8, dtype=np.uint8) #get numpy array
-        img_rgba = img1d.reshape(response.height, response.width, 4) #reshape array to 4 channel image array H X W X 4
-        img_rgba = np.flipud(img_rgba) #original image is flipped vertically
-        #airsim.write_png(os.path.normpath(filename + '.numpy.png'), img_rgba) #write to png 
+        img_rgb = img1d.reshape(response.height, response.width, 3) #reshape array to 3 channel image array H X W X 3
+        img_rgb = np.flipud(img_rgb) #original image is flipped vertically
+        #airsim.write_png(os.path.normpath(filename + '.numpy.png'), img_rgb) #write to png 
 
         #find unique colors
-        print(np.unique(img_rgba[:,:,0], return_counts=True)) #red
-        print(np.unique(img_rgba[:,:,1], return_counts=True)) #green
-        print(np.unique(img_rgba[:,:,2], return_counts=True)) #blue  
-        print(np.unique(img_rgba[:,:,3], return_counts=True)) #blue
-
-
-
-
-
-
-
-
+        print(np.unique(img_rgb[:,:,0], return_counts=True)) #red
+        print(np.unique(img_rgb[:,:,1], return_counts=True)) #green
+        print(np.unique(img_rgb[:,:,2], return_counts=True)) #blue  
diff --git a/PythonClient/imitation_learning/drive_model.py b/PythonClient/imitation_learning/drive_model.py
@@ -39,8 +39,8 @@ def get_image():
     """
     image_response = client.simGetImages([airsim.ImageRequest("0", airsim.ImageType.Scene, False, False)])[0]
     image1d = np.fromstring(image_response.image_data_uint8, dtype=np.uint8)
-    image_rgba = image1d.reshape(image_response.height, image_response.width, 4)
-    return image_rgba[78:144,27:227,0:3].astype(float)
+    image_rgb = image1d.reshape(image_response.height, image_response.width, 3)
+    return image_rgb[78:144,27:227,0:2].astype(float)
 
 while True:    
     # Update throttle value according to steering angle

diff --git a/PythonClient/multirotor/kinect_publisher.py b/PythonClient/multirotor/kinect_publisher.py
@@ -40,8 +40,8 @@ def getDepthImage(self,response_d):
 
     def getRGBImage(self,response_rgb):
         img1d = np.fromstring(response_rgb.image_data_uint8, dtype=np.uint8)
-        img_rgba = img1d.reshape(response_rgb.height, response_rgb.width, 4)
-        img_rgb = img_rgba[..., :3][..., ::-1]
+        img_rgb = img1d.reshape(response_rgb.height, response_rgb.width, 3)
+        img_rgb = img_rgb[..., :3][..., ::-1]
         return img_rgb
 
     def enhanceRGB(self,img_rgb):

diff --git a/PythonClient/multirotor/multi_agent_drone.py b/PythonClient/multirotor/multi_agent_drone.py
@@ -59,11 +59,11 @@
 # get camera images from the car
 responses1 = client.simGetImages([
     airsim.ImageRequest("0", airsim.ImageType.DepthVis),  #depth visualization image
-    airsim.ImageRequest("1", airsim.ImageType.Scene, False, False)], vehicle_name="Drone1")  #scene vision image in uncompressed RGBA array
+    airsim.ImageRequest("1", airsim.ImageType.Scene, False, False)], vehicle_name="Drone1")  #scene vision image in uncompressed RGB array
 print('Drone1: Retrieved images: %d' % len(responses1))
 responses2 = client.simGetImages([
     airsim.ImageRequest("0", airsim.ImageType.DepthVis),  #depth visualization image
-    airsim.ImageRequest("1", airsim.ImageType.Scene, False, False)], vehicle_name="Drone2")  #scene vision image in uncompressed RGBA array
+    airsim.ImageRequest("1", airsim.ImageType.Scene, False, False)], vehicle_name="Drone2")  #scene vision image in uncompressed RGB array
 print('Drone2: Retrieved images: %d' % len(responses2))
 
 tmp_dir = os.path.join(tempfile.gettempdir(), "airsim_drone")
@@ -87,10 +87,9 @@
     else: #uncompressed array
         print("Type %d, size %d" % (response.image_type, len(response.image_data_uint8)))
         img1d = np.fromstring(response.image_data_uint8, dtype=np.uint8) #get numpy array
-        img_rgba = img1d.reshape(response.height, response.width, 4) #reshape array to 4 channel image array H X W X 4
-        img_rgba = np.flipud(img_rgba) #original image is flipped vertically
-        img_rgba[:,:,1:2] = 100 #just for fun add little bit of green in all pixels
-        airsim.write_png(os.path.normpath(filename + '.greener.png'), img_rgba) #write to png
+        img_rgb = img1d.reshape(response.height, response.width, 3) #reshape array to 3 channel image array H X W X 3
+        img_rgb = np.flipud(img_rgb) #original image is flipped vertically
+        airsim.write_png(os.path.normpath(filename + '.png'), img_rgb) #write to png
 
 airsim.wait_key('Press any key to reset to original state')
 

diff --git a/PythonClient/ros/car_image_raw.py b/PythonClient/ros/car_image_raw.py
@@ -22,21 +22,21 @@ def airpub():
     while not rospy.is_shutdown():
          # get camera images from the car
         responses = client.simGetImages([
-            airsim.ImageRequest("1", airsim.ImageType.Scene, False, False)])  #scene vision image in uncompressed RGBA array
+            airsim.ImageRequest("1", airsim.ImageType.Scene, False, False)])  #scene vision image in uncompressed RGB array
 
         for response in responses:
-            img_rgba_string = response.image_data_uint8
+            img_rgb_string = response.image_data_uint8
 
         # Populate image message
         msg=Image() 
         msg.header.stamp = rospy.Time.now()
         msg.header.frame_id = "frameId"
-        msg.encoding = "rgba8"
+        msg.encoding = "rgb8"
         msg.height = 360  # resolution should match values in settings.json 
         msg.width = 640
-        msg.data = img_rgba_string
+        msg.data = img_rgb_string
         msg.is_bigendian = 0
-        msg.step = msg.width * 4
+        msg.step = msg.width * 3
 
         # log time and size of published image
         rospy.loginfo(len(response.image_data_uint8))

diff --git a/PythonClient/ros/drone_image_raw.py b/PythonClient/ros/drone_image_raw.py
@@ -23,21 +23,21 @@ def airpub():
     while not rospy.is_shutdown():
          # get camera images from the car
         responses = client.simGetImages([
-            airsim.ImageRequest("1", airsim.ImageType.Scene, False, False)])  #scene vision image in uncompressed RGBA array
+            airsim.ImageRequest("1", airsim.ImageType.Scene, False, False)])  #scene vision image in uncompressed RGB array
 
         for response in responses:
-            img_rgba_string = response.image_data_uint8
+            img_rgb_string = response.image_data_uint8
 
         # Populate image message
         msg=Image() 
         msg.header.stamp = rospy.Time.now()
         msg.header.frame_id = "frameId"
-        msg.encoding = "rgba8"
+        msg.encoding = "rgb8"
         msg.height = 360  # resolution should match values in settings.json 
         msg.width = 640
-        msg.data = img_rgba_string
+        msg.data = img_rgb_string
         msg.is_bigendian = 0
-        msg.step = msg.width * 4
+        msg.step = msg.width * 3
 
         # log time and size of published image
         rospy.loginfo(len(response.image_data_uint8))

diff --git a/Unreal/Plugins/AirSim/Source/PIPCamera.cpp b/Unreal/Plugins/AirSim/Source/PIPCamera.cpp
@@ -24,6 +24,15 @@ APIPCamera::APIPCamera()
             "", LogDebugLevel::Failure);
 
     PrimaryActorTick.bCanEverTick = true;
+
+    image_type_to_pixel_format_map_.Add(0, EPixelFormat::PF_B8G8R8A8);
+    image_type_to_pixel_format_map_.Add(1, EPixelFormat::PF_DepthStencil); // not used. init_auto_format is called in setupCameraFromSettings() 
+    image_type_to_pixel_format_map_.Add(2, EPixelFormat::PF_DepthStencil); // not used for same reason as above
+    image_type_to_pixel_format_map_.Add(3, EPixelFormat::PF_DepthStencil); // not used for same reason as above 
+    image_type_to_pixel_format_map_.Add(4, EPixelFormat::PF_DepthStencil); // not used for same reason as above 
+    image_type_to_pixel_format_map_.Add(5, EPixelFormat::PF_B8G8R8A8);
+    image_type_to_pixel_format_map_.Add(6, EPixelFormat::PF_B8G8R8A8);
+    image_type_to_pixel_format_map_.Add(7, EPixelFormat::PF_B8G8R8A8);
 }
 
 void APIPCamera::PostInitializeComponents()
@@ -260,8 +269,12 @@ void APIPCamera::setupCameraFromSettings(const APIPCamera::CameraSetting& camera
         const auto& noise_setting = camera_setting.noise_settings.at(image_type);
 
         if (image_type >= 0) { //scene capture components
-            updateCaptureComponentSetting(captures_[image_type], render_targets_[image_type],
-                capture_setting, ned_transform);
+            if (image_type==0 || image_type==5 || image_type==6 || image_type==7)
+                updateCaptureComponentSetting(captures_[image_type], render_targets_[image_type], false, 
+                    image_type_to_pixel_format_map_[image_type], capture_setting, ned_transform);
+            else
+                updateCaptureComponentSetting(captures_[image_type], render_targets_[image_type], true, 
+                    image_type_to_pixel_format_map_[image_type], capture_setting, ned_transform); 
 
             setNoiseMaterial(image_type, captures_[image_type], captures_[image_type]->PostProcessSettings, noise_setting);
         }
@@ -274,9 +287,17 @@ void APIPCamera::setupCameraFromSettings(const APIPCamera::CameraSetting& camera
 }
 
 void APIPCamera::updateCaptureComponentSetting(USceneCaptureComponent2D* capture, UTextureRenderTarget2D* render_target, 
-    const CaptureSetting& setting, const NedTransform& ned_transform)
+    bool auto_format, const EPixelFormat& pixel_format, const CaptureSetting& setting, const NedTransform& ned_transform)
 {
-    render_target->InitAutoFormat(setting.width, setting.height); //256 X 144, X 480
+    if (auto_format)
+    {
+        render_target->InitAutoFormat(setting.width, setting.height); //256 X 144, X 480
+    }
+    else
+    {
+        render_target->InitCustomFormat(setting.width, setting.height, pixel_format, false);
+    } 
+
     if (!std::isnan(setting.target_gamma))
         render_target->TargetGamma = setting.target_gamma;
 

diff --git a/Unreal/Plugins/AirSim/Source/PIPCamera.h b/Unreal/Plugins/AirSim/Source/PIPCamera.h
@@ -4,7 +4,7 @@
 #include "Components/SceneCaptureComponent2D.h"
 #include "Camera/CameraActor.h"
 #include "Materials/Material.h"
-
+#include "Runtime/Core/Public/PixelFormat.h"
 #include "common/ImageCaptureBase.hpp"
 #include "common/common_utils/Utils.hpp"
 #include "common/AirSimSettings.hpp"
@@ -65,6 +65,7 @@ class AIRSIM_API APIPCamera : public ACameraActor
     FRotator gimbald_rotator_;
     float gimbal_stabilization_;
     const NedTransform* ned_transform_;
+    TMap<int, EPixelFormat> image_type_to_pixel_format_map_;
 
 private: //methods
     typedef common_utils::Utils Utils;
@@ -73,8 +74,8 @@ class AIRSIM_API APIPCamera : public ACameraActor
 
     static unsigned int imageTypeCount();
     void enableCaptureComponent(const ImageType type, bool is_enabled);
-    static void updateCaptureComponentSetting(USceneCaptureComponent2D* capture, UTextureRenderTarget2D* render_target, const CaptureSetting& setting, 
-        const NedTransform& ned_transform);
+    static void updateCaptureComponentSetting(USceneCaptureComponent2D* capture, UTextureRenderTarget2D* render_target, 
+        bool auto_format, const EPixelFormat& pixel_format, const CaptureSetting& setting, const NedTransform& ned_transform);
     void setNoiseMaterial(int image_type, UObject* outer, FPostProcessSettings& obj, const NoiseSetting& settings);
     static void updateCameraPostProcessingSetting(FPostProcessSettings& obj, const CaptureSetting& setting);
     static void updateCameraSetting(UCameraComponent* camera, const CaptureSetting& setting, const NedTransform& ned_transform);

diff --git a/Unreal/Plugins/AirSim/Source/RenderRequest.cpp b/Unreal/Plugins/AirSim/Source/RenderRequest.cpp
@@ -106,7 +106,7 @@ void RenderRequest::getScreenshot(std::shared_ptr<RenderParams> params[], std::v
     for (unsigned int i = 0; i < req_size; ++i) {
         if (!params[i]->pixels_as_float) {
             if (results[i]->width != 0 && results[i]->height != 0) {
-                results[i]->image_data_uint8.SetNumUninitialized(results[i]->width * results[i]->height * 4, false);
+                results[i]->image_data_uint8.SetNumUninitialized(results[i]->width * results[i]->height * 3, false);
                 if (params[i]->compress)
                     UAirBlueprintLib::CompressImageArray(results[i]->width, results[i]->height, results[i]->bmp, results[i]->image_data_uint8);
                 else {
@@ -115,7 +115,6 @@ void RenderRequest::getScreenshot(std::shared_ptr<RenderParams> params[], std::v
                         *ptr++ = item.R;
                         *ptr++ = item.G;
                         *ptr++ = item.B;
-                        *ptr++ = item.A;
                     }
                 }
             }