PyAV-Org · z-khan · Apr 24, 2025 · Apr 25, 2025 · Apr 25, 2025 · Apr 25, 2025
diff --git a/av/video/frame.pyi b/av/video/frame.pyi
@@ -79,7 +79,7 @@ class VideoFrame(Frame):
         data: bytes,
         width: int,
         height: int,
-        format: str = "rgba",
+        format: str = "rgb24",
         flip_horizontal: bool = False,
         flip_vertical: bool = False,
     ) -> VideoFrame: ...
diff --git a/av/video/frame.pyx b/av/video/frame.pyx
@@ -17,11 +17,11 @@ supported_np_pix_fmts = {
     "abgr", "argb", "bayer_bggr16be", "bayer_bggr16le", "bayer_bggr8", "bayer_gbrg16be",
     "bayer_gbrg16le", "bayer_gbrg8", "bayer_grbg16be", "bayer_grbg16le", "bayer_grbg8",
     "bayer_rggb16be", "bayer_rggb16le", "bayer_rggb8", "bgr24", "bgr48be", "bgr48le", "bgr8", "bgra", "bgra64be", "bgra64le",
-    "gbrapf32be", "gbrapf32le", "gbrp", "gbrp10be", "gbrp10le", "gbrp12be", "gbrp12le",
-    "gbrp14be", "gbrp14le", "gbrp16be", "gbrp16le", "gbrpf32be", "gbrpf32le", "gray",
-    "gray16be", "gray16le", "gray8", "grayf32be", "grayf32le", "nv12", "pal8", "rgb24",
-    "rgb48be", "rgb48le", "rgb8", "rgba", "rgba64be", "rgba64le", "yuv420p",
-    "yuv420p10le", "yuv422p10le", "yuv444p", "yuv444p16be", "yuv444p16le", "yuva444p16be",
+    "gbrap10be", "gbrap10le", "gbrap12be", "gbrap12le","gbrap14be", "gbrap14le", "gbrap16be", "gbrap16le", "gbrapf32be", "gbrapf32le",
+    "gbrp", "gbrp9be", "gbrp9le", "gbrp10be", "gbrp10le", "gbrp12be", "gbrp12le", "gbrp14be", "gbrp14le", "gbrp16be", "gbrp16le", "gbrpf32be", "gbrpf32le",
+    "gray", "gray8", "gray9be", "gray9le", "gray10be", "gray10le", "gray12be", "gray12le", "gray14be", "gray14le", "gray16be", "gray16le", "grayf32be", "grayf32le",
+    "nv12", "pal8", "rgb24", "rgb48be", "rgb48le", "rgb8", "rgba", "rgba64be", "rgba64le",
+    "yuv420p", "yuv420p10le", "yuv422p10le", "yuv444p", "yuv444p16be", "yuv444p16le", "yuva444p16be",
     "yuva444p16le", "yuvj420p", "yuvj444p", "yuyv422",
 }
 
@@ -346,9 +346,21 @@ cdef class VideoFrame(Frame):
             "bgr48le": (6, "uint16"),
             "bgr8": (1, "uint8"),
             "bgra": (4, "uint8"),
+            "bgra64be": (8, "uint16"),
+            "bgra64le": (8, "uint16"),
+            "gbrap10be": (2, "uint16"),
+            "gbrap10le": (2, "uint16"),
+            "gbrap12be": (2, "uint16"),
+            "gbrap12le": (2, "uint16"),
+            "gbrap14be": (2, "uint16"),
+            "gbrap14le": (2, "uint16"),
+            "gbrap16be": (2, "uint16"),
+            "gbrap16le": (2, "uint16"),
             "gbrapf32be": (4, "float32"),
             "gbrapf32le": (4, "float32"),
             "gbrp": (1, "uint8"),
+            "gbrp9be": (2, "uint16"),
+            "gbrp9le": (2, "uint16"),
             "gbrp10be": (2, "uint16"),
             "gbrp10le": (2, "uint16"),
             "gbrp12be": (2, "uint16"),
@@ -360,9 +372,17 @@ cdef class VideoFrame(Frame):
             "gbrpf32be": (4, "float32"),
             "gbrpf32le": (4, "float32"),
             "gray": (1, "uint8"),
+            "gray8": (1, "uint8"),
+            "gray9be": (2, "uint16"),
+            "gray9le": (2, "uint16"),
+            "gray10be": (2, "uint16"),
+            "gray10le": (2, "uint16"),
+            "gray12be": (2, "uint16"),
+            "gray12le": (2, "uint16"),
+            "gray14be": (2, "uint16"),
+            "gray14le": (2, "uint16"),
             "gray16be": (2, "uint16"),
             "gray16le": (2, "uint16"),
-            "gray8": (1, "uint8"),
             "grayf32be": (4, "float32"),
             "grayf32le": (4, "float32"),
             "rgb24": (3, "uint8"),
@@ -372,8 +392,6 @@ cdef class VideoFrame(Frame):
             "rgba": (4, "uint8"),
             "rgba64be": (8, "uint16"),
             "rgba64le": (8, "uint16"),
-            "bgra64be": (8, "uint16"),
-            "bgra64le": (8, "uint16"),
             "yuv444p": (1, "uint8"),
             "yuv444p16be": (2, "uint16"),
             "yuv444p16le": (2, "uint16"),
@@ -461,12 +479,18 @@ cdef class VideoFrame(Frame):
         return frame
 
     @staticmethod
-    def from_numpy_buffer(array, format="rgb24", width=0):
-        # Usually the width of the array is the same as the width of the image. But sometimes
-        # this is not possible, for example with yuv420p images that have padding. These are
-        # awkward because the UV rows at the bottom have padding bytes in the middle of the
-        # row as well as at the end. To cope with these, callers need to be able to pass the
-        # actual width to us.
+    def from_numpy_buffer(array, format="rgb24", width:int=0):
+        """
+        Construct a frame from a numpy buffer.
+
+        :param int width: optional width of actual image, if different from the array width.
+
+        .. note:: For formats where width of the array is not the same as the width of the image,
+        for example with yuv420p images the UV rows at the bottom have padding bytes in the middle of the
+        row as well as at the end. To cope with these, callers need to be able to pass the actual width.
+        """
+        import numpy as np
+
         height = array.shape[0]
         if not width:
             width = array.shape[1]
@@ -495,12 +519,43 @@ cdef class VideoFrame(Frame):
             if array.strides[1:] != (8, 2):
                 raise ValueError("provided array does not have C_CONTIGUOUS rows")
             linesizes = (array.strides[0], )
+        elif format in {"gbrp"}:
+            check_ndarray(array, "uint8", 3)
+            check_ndarray_shape(array, array.shape[2] == 3)
+            if array.strides[1:] != (3, 1):
+                raise ValueError("provided array does not have C_CONTIGUOUS rows")
+            linesizes = (array.strides[0] // 3, array.strides[0] // 3, array.strides[0] // 3, )
+        elif format in {"gbrp9be", "gbrp9le", "gbrp10be", "gbrp10le", "gbrp12be", "gbrp12le", "gbrp14be", "gbrp14le", "gbrp16be", "gbrp16le"}:
+            check_ndarray(array, "uint16", 3)
+            check_ndarray_shape(array, array.shape[2] == 3)
+            if array.strides[1:] != (6, 2):
+                raise ValueError("provided array does not have C_CONTIGUOUS rows")
+            linesizes = (array.strides[0] // 3, array.strides[0] // 3, array.strides[0] // 3, )
+        elif format in {"gbrpf32be", "gbrpf32le"}:
+            check_ndarray(array, "float32", 3)
+            check_ndarray_shape(array, array.shape[2] == 3)
+            if array.strides[1:] != (12, 4):
+                raise ValueError("provided array does not have C_CONTIGUOUS rows")
+            linesizes = (array.strides[0] // 3, array.strides[0] // 3, array.strides[0] // 3, )
+        elif format in {"gbrap10be", "gbrap10le", "gbrap12be", "gbrap12le", "gbrap14be", "gbrap14le", "gbrap16be", "gbrap16le"}:
+            check_ndarray(array, "uint16", 3)
+            check_ndarray_shape(array, array.shape[2] == 4)
+            if array.strides[1:] != (8, 2):
+                raise ValueError("provided array does not have C_CONTIGUOUS rows")
+            linesizes = (array.strides[0] // 4, array.strides[0] // 4, array.strides[0] // 4, array.strides[0] // 4, )
+        elif format in {"gbrapf32be", "gbrapf32le"}:
+            check_ndarray(array, "float32", 3)
+            check_ndarray_shape(array, array.shape[2] == 4)
+            if array.strides[1:] != (16, 4):
+                raise ValueError("provided array does not have C_CONTIGUOUS rows")
+            linesizes = (array.strides[0] // 4, array.strides[0] // 4, array.strides[0] // 4, array.strides[0] // 4, )
         elif format in {"gray", "gray8", "rgb8", "bgr8","bayer_bggr8", "bayer_rggb8", "bayer_gbrg8", "bayer_grbg8"}:
             check_ndarray(array, "uint8", 2)
             if array.strides[1] != 1:
                 raise ValueError("provided array does not have C_CONTIGUOUS rows")
             linesizes = (array.strides[0], )
-        elif format in {"gray16le", "gray16be", "bayer_rggb16le", "bayer_gbrg16le", "bayer_grbg16le","bayer_bggr16be", "bayer_rggb16be", "bayer_gbrg16be", "bayer_grbg16be"}:
+        elif format in {"gray9be", "gray9le", "gray10be", "gray10le", "gray12be", "gray12le", "gray14be", "gray14le", "gray16be", "gray16le",
+            "bayer_bggr16be", "bayer_bggr16le", "bayer_gbrg16be", "bayer_gbrg16le", "bayer_grbg16be", "bayer_grbg16le", "bayer_rggb16be", "bayer_rggb16le"}:
             check_ndarray(array, "uint16", 2)
             if array.strides[1] != 2:
                 raise ValueError("provided array does not have C_CONTIGUOUS rows")
@@ -525,7 +580,11 @@ cdef class VideoFrame(Frame):
                 linesizes = (array.strides[0], array.strides[0])
         else:
             raise ValueError(f"Conversion from numpy array with format `{format}` is not yet supported")
-
+
+        if format.startswith("gbrap"):  # rgba -> gbra
+            array = np.ascontiguousarray(np.moveaxis(array[..., [1, 2, 0, 3]], -1, 0))
+        elif format.startswith("gbrp"): # rgb -> gbr
+            array = np.ascontiguousarray(np.moveaxis(array[..., [1, 2, 0]], -1, 0))
         frame = alloc_video_frame()
         frame._image_fill_pointers_numpy(array, width, height, linesizes, format)
         return frame
@@ -600,45 +659,63 @@ cdef class VideoFrame(Frame):
 
         # case layers are concatenated
         channels, itemsize, dtype = {
-            "yuv444p": (3, 1, "uint8"),
-            "yuvj444p": (3, 1, "uint8"),
+            "bayer_bggr8": (1, 1, "uint8"),
+            "bayer_rggb8": (1, 1, "uint8"),
+            "bayer_grbg8": (1, 1, "uint8"),
+            "bayer_gbrg8": (1, 1, "uint8"),
+            "bayer_bggr16be": (1, 2, "uint16"),
+            "bayer_bggr16le": (1, 2, "uint16"),
+            "bayer_rggb16be": (1, 2, "uint16"),
+            "bayer_rggb16le": (1, 2, "uint16"),
+            "bayer_grbg16be": (1, 2, "uint16"),
+            "bayer_grbg16le": (1, 2, "uint16"),
+            "bayer_gbrg16be": (1, 2, "uint16"),
+            "bayer_gbrg16le": (1, 2, "uint16"),
+            "bgr8": (1, 1, "uint8"),
+            "gbrap10be": (4, 2, "uint16"),
+            "gbrap10le": (4, 2, "uint16"),
+            "gbrap12be": (4, 2, "uint16"),
+            "gbrap12le": (4, 2, "uint16"),
+            "gbrap14be": (4, 2, "uint16"),
+            "gbrap14le": (4, 2, "uint16"),
+            "gbrap16be": (4, 2, "uint16"),
+            "gbrap16le": (4, 2, "uint16"),
+            "gbrapf32be": (4, 4, "float32"),
+            "gbrapf32le": (4, 4, "float32"),
             "gbrp": (3, 1, "uint8"),
+            "gbrp9be": (3, 2, "uint16"),
+            "gbrp9le": (3, 2, "uint16"),
             "gbrp10be": (3, 2, "uint16"),
-            "gbrp12be": (3, 2, "uint16"),
-            "gbrp14be": (3, 2, "uint16"),
-            "gbrp16be": (3, 2, "uint16"),
             "gbrp10le": (3, 2, "uint16"),
+            "gbrp12be": (3, 2, "uint16"),
             "gbrp12le": (3, 2, "uint16"),
+            "gbrp14be": (3, 2, "uint16"),
             "gbrp14le": (3, 2, "uint16"),
+            "gbrp16be": (3, 2, "uint16"),
             "gbrp16le": (3, 2, "uint16"),
             "gbrpf32be": (3, 4, "float32"),
             "gbrpf32le": (3, 4, "float32"),
             "gray": (1, 1, "uint8"),
             "gray8": (1, 1, "uint8"),
-            "rgb8": (1, 1, "uint8"),
-            "bgr8": (1, 1, "uint8"),
+            "gray9be": (1, 2, "uint16"),
+            "gray9le": (1, 2, "uint16"),
+            "gray10be": (1, 2, "uint16"),
+            "gray10le": (1, 2, "uint16"),
+            "gray12be": (1, 2, "uint16"),
+            "gray12le": (1, 2, "uint16"),
+            "gray14be": (1, 2, "uint16"),
+            "gray14le": (1, 2, "uint16"),
             "gray16be": (1, 2, "uint16"),
             "gray16le": (1, 2, "uint16"),
             "grayf32be": (1, 4, "float32"),
             "grayf32le": (1, 4, "float32"),
-            "gbrapf32be": (4, 4, "float32"),
-            "gbrapf32le": (4, 4, "float32"),
+            "rgb8": (1, 1, "uint8"),
+            "yuv444p": (3, 1, "uint8"),
+            "yuvj444p": (3, 1, "uint8"),
             "yuv444p16be": (3, 2, "uint16"),
             "yuv444p16le": (3, 2, "uint16"),
             "yuva444p16be": (4, 2, "uint16"),
-            "yuva444p16le": (4, 2, "uint16"),            
-            "bayer_bggr8": (1, 1, "uint8"),
-            "bayer_rggb8": (1, 1, "uint8"),
-            "bayer_grbg8": (1, 1, "uint8"),
-            "bayer_gbrg8": (1, 1, "uint8"),
-            "bayer_bggr16be": (1, 2, "uint16"),
-            "bayer_bggr16le": (1, 2, "uint16"),
-            "bayer_rggb16be": (1, 2, "uint16"),
-            "bayer_rggb16le": (1, 2, "uint16"),
-            "bayer_grbg16be": (1, 2, "uint16"),
-            "bayer_grbg16le": (1, 2, "uint16"),
-            "bayer_gbrg16be": (1, 2, "uint16"),
-            "bayer_gbrg16le": (1, 2, "uint16"),
+            "yuva444p16le": (4, 2, "uint16"),
         }.get(format, (None, None, None))
         if channels is not None:
             if array.ndim == 2:  # (height, width) -> (height, width, 1)
@@ -768,12 +845,25 @@ cdef class VideoFrame(Frame):
         return frame
 
     @staticmethod
-    def from_bytes(img_bytes: bytes, width: int, height: int, format="rgba", flip_horizontal=False, flip_vertical=False):
+    def from_bytes(img_bytes: bytes, width: int, height: int, format="rgb24", flip_horizontal=False, flip_vertical=False):
+        """
+        Construct a frame from raw bytes.
+
+        :param img_bytes: Raw image data.
+        :param width: Frame width.
+        :param height: Frame height.
+        :param format: Pixel format, e.g. "rgb24".
+        :param flip_horizontal: If True, flip image horizontally.
+        :param flip_vertical: If True, flip image vertically.
+        """
         frame = VideoFrame(width, height, format)
-        if format == "rgba":
-            copy_bytes_to_plane(img_bytes, frame.planes[0], 4, flip_horizontal, flip_vertical)
-        elif format in ("bayer_bggr8", "bayer_rggb8", "bayer_gbrg8", "bayer_grbg8","bayer_bggr16le", "bayer_rggb16le", "bayer_gbrg16le", "bayer_grbg16le","bayer_bggr16be", "bayer_rggb16be", "bayer_gbrg16be", "bayer_grbg16be"):
-            copy_bytes_to_plane(img_bytes, frame.planes[0], 1 if format.endswith("8") else 2, flip_horizontal, flip_vertical)
+        if frame.format.is_planar:
+            raise NotImplementedError(f"Conversion from bytes with format `{format}` is not yet supported")
         else:
-            raise NotImplementedError(f"Format '{format}' is not supported.")
+            bytes_per_pixel = frame.format.padded_bits_per_pixel // 8
+            expected_size = width * height * bytes_per_pixel
+            if len(img_bytes) != expected_size:
+                raise ValueError(f"Expected {expected_size} bytes, got {len(img_bytes)}")
+            copy_bytes_to_plane(img_bytes, frame.planes[0], bytes_per_pixel, flip_horizontal, flip_vertical)
         return frame
+
diff --git a/tests/test_videoframe.py b/tests/test_videoframe.py
@@ -329,6 +329,24 @@ def test_ndarray_gbrp_align() -> None:
     assert frame.format.name == "gbrp"
     assertNdarraysEqual(frame.to_ndarray(), array)
 
+def test_ndarray_gbrp9() -> None:
+    array = numpy.random.randint(0, 512, size=(480, 640, 3), dtype=numpy.uint16)
+    for format in ("gbrp9be", "gbrp9le"):
+        frame = VideoFrame.from_ndarray(array, format=format)
+        assert format in av.video.frame.supported_np_pix_fmts
+        assert frame.width == 640 and frame.height == 480
+        assert frame.format.name == format
+        assertNdarraysEqual(frame.to_ndarray(), array)
+
+
+def test_ndarray_gbrp9_align() -> None:
+    array = numpy.random.randint(0, 512, size=(238, 318, 3), dtype=numpy.uint16)
+    for format in ("gbrp9be", "gbrp9le"):
+        frame = VideoFrame.from_ndarray(array, format=format)
+        assert format in av.video.frame.supported_np_pix_fmts
+        assert frame.width == 318 and frame.height == 238
+        assert frame.format.name == format
+        assertNdarraysEqual(frame.to_ndarray(), array)
 
 def test_ndarray_gbrp10() -> None:
     array = numpy.random.randint(0, 1024, size=(480, 640, 3), dtype=numpy.uint16)
@@ -568,25 +586,27 @@ def test_ndarray_yuyv422_align() -> None:
 
 
 def test_ndarray_gray16be() -> None:
-    array = numpy.random.randint(0, 65536, size=(480, 640), dtype=numpy.uint16)
-    frame = VideoFrame.from_ndarray(array, format="gray16be")
-    assert frame.width == 640 and frame.height == 480
-    assert frame.format.name == "gray16be"
-    assertNdarraysEqual(frame.to_ndarray(), array)
-
-    # check endianness by examining value of first pixel
-    assertPixelValue16(frame.planes[0], array[0][0], "big")
+    for bits in (9,10,12,14,16):
+        array = numpy.random.randint(0, 2**bits, size=(480, 640), dtype=numpy.uint16)
+        frame = VideoFrame.from_ndarray(array, format=f"gray{bits}be")
+        assert frame.width == 640 and frame.height == 480
+        assert frame.format.name == f"gray{bits}be"
+        assertNdarraysEqual(frame.to_ndarray(), array)
+
+        # check endianness by examining value of first pixel
+        assertPixelValue16(frame.planes[0], array[0][0], "big")
 
 
 def test_ndarray_gray16le() -> None:
-    array = numpy.random.randint(0, 65536, size=(480, 640), dtype=numpy.uint16)
-    frame = VideoFrame.from_ndarray(array, format="gray16le")
-    assert frame.width == 640 and frame.height == 480
-    assert frame.format.name == "gray16le"
-    assertNdarraysEqual(frame.to_ndarray(), array)
-
-    # check endianness by examining value of first pixel
-    assertPixelValue16(frame.planes[0], array[0][0], "little")
+    for bits in (9,10,12,14,16):
+        array = numpy.random.randint(0, 2**bits, size=(480, 640), dtype=numpy.uint16)
+        frame = VideoFrame.from_ndarray(array, format=f"gray{bits}le")
+        assert frame.width == 640 and frame.height == 480
+        assert frame.format.name == f"gray{bits}le"
+        assertNdarraysEqual(frame.to_ndarray(), array)
+
+        # check endianness by examining value of first pixel
+        assertPixelValue16(frame.planes[0], array[0][0], "little")
 
 
 def test_ndarray_rgb48be() -> None: