diff --git a/av/video/frame.pyi b/av/video/frame.pyi index bba60cc5d..8b76c4da3 100644 --- a/av/video/frame.pyi +++ b/av/video/frame.pyi @@ -79,7 +79,7 @@ class VideoFrame(Frame): data: bytes, width: int, height: int, - format: str = "rgba", + format: str = "rgb24", flip_horizontal: bool = False, flip_vertical: bool = False, ) -> VideoFrame: ... diff --git a/av/video/frame.pyx b/av/video/frame.pyx index 04a14fd8b..4d2c55f79 100644 --- a/av/video/frame.pyx +++ b/av/video/frame.pyx @@ -17,11 +17,11 @@ supported_np_pix_fmts = { "abgr", "argb", "bayer_bggr16be", "bayer_bggr16le", "bayer_bggr8", "bayer_gbrg16be", "bayer_gbrg16le", "bayer_gbrg8", "bayer_grbg16be", "bayer_grbg16le", "bayer_grbg8", "bayer_rggb16be", "bayer_rggb16le", "bayer_rggb8", "bgr24", "bgr48be", "bgr48le", "bgr8", "bgra", "bgra64be", "bgra64le", - "gbrapf32be", "gbrapf32le", "gbrp", "gbrp10be", "gbrp10le", "gbrp12be", "gbrp12le", - "gbrp14be", "gbrp14le", "gbrp16be", "gbrp16le", "gbrpf32be", "gbrpf32le", "gray", - "gray16be", "gray16le", "gray8", "grayf32be", "grayf32le", "nv12", "pal8", "rgb24", - "rgb48be", "rgb48le", "rgb8", "rgba", "rgba64be", "rgba64le", "yuv420p", - "yuv420p10le", "yuv422p10le", "yuv444p", "yuv444p16be", "yuv444p16le", "yuva444p16be", + "gbrap10be", "gbrap10le", "gbrap12be", "gbrap12le","gbrap14be", "gbrap14le", "gbrap16be", "gbrap16le", "gbrapf32be", "gbrapf32le", + "gbrp", "gbrp9be", "gbrp9le", "gbrp10be", "gbrp10le", "gbrp12be", "gbrp12le", "gbrp14be", "gbrp14le", "gbrp16be", "gbrp16le", "gbrpf32be", "gbrpf32le", + "gray", "gray8", "gray9be", "gray9le", "gray10be", "gray10le", "gray12be", "gray12le", "gray14be", "gray14le", "gray16be", "gray16le", "grayf32be", "grayf32le", + "nv12", "pal8", "rgb24", "rgb48be", "rgb48le", "rgb8", "rgba", "rgba64be", "rgba64le", + "yuv420p", "yuv420p10le", "yuv422p10le", "yuv444p", "yuv444p16be", "yuv444p16le", "yuva444p16be", "yuva444p16le", "yuvj420p", "yuvj444p", "yuyv422", } @@ -346,9 +346,21 @@ cdef class VideoFrame(Frame): "bgr48le": (6, "uint16"), "bgr8": (1, "uint8"), "bgra": (4, "uint8"), + "bgra64be": (8, "uint16"), + "bgra64le": (8, "uint16"), + "gbrap10be": (2, "uint16"), + "gbrap10le": (2, "uint16"), + "gbrap12be": (2, "uint16"), + "gbrap12le": (2, "uint16"), + "gbrap14be": (2, "uint16"), + "gbrap14le": (2, "uint16"), + "gbrap16be": (2, "uint16"), + "gbrap16le": (2, "uint16"), "gbrapf32be": (4, "float32"), "gbrapf32le": (4, "float32"), "gbrp": (1, "uint8"), + "gbrp9be": (2, "uint16"), + "gbrp9le": (2, "uint16"), "gbrp10be": (2, "uint16"), "gbrp10le": (2, "uint16"), "gbrp12be": (2, "uint16"), @@ -360,9 +372,17 @@ cdef class VideoFrame(Frame): "gbrpf32be": (4, "float32"), "gbrpf32le": (4, "float32"), "gray": (1, "uint8"), + "gray8": (1, "uint8"), + "gray9be": (2, "uint16"), + "gray9le": (2, "uint16"), + "gray10be": (2, "uint16"), + "gray10le": (2, "uint16"), + "gray12be": (2, "uint16"), + "gray12le": (2, "uint16"), + "gray14be": (2, "uint16"), + "gray14le": (2, "uint16"), "gray16be": (2, "uint16"), "gray16le": (2, "uint16"), - "gray8": (1, "uint8"), "grayf32be": (4, "float32"), "grayf32le": (4, "float32"), "rgb24": (3, "uint8"), @@ -372,8 +392,6 @@ cdef class VideoFrame(Frame): "rgba": (4, "uint8"), "rgba64be": (8, "uint16"), "rgba64le": (8, "uint16"), - "bgra64be": (8, "uint16"), - "bgra64le": (8, "uint16"), "yuv444p": (1, "uint8"), "yuv444p16be": (2, "uint16"), "yuv444p16le": (2, "uint16"), @@ -461,12 +479,18 @@ cdef class VideoFrame(Frame): return frame @staticmethod - def from_numpy_buffer(array, format="rgb24", width=0): - # Usually the width of the array is the same as the width of the image. But sometimes - # this is not possible, for example with yuv420p images that have padding. These are - # awkward because the UV rows at the bottom have padding bytes in the middle of the - # row as well as at the end. To cope with these, callers need to be able to pass the - # actual width to us. + def from_numpy_buffer(array, format="rgb24", width:int=0): + """ + Construct a frame from a numpy buffer. + + :param int width: optional width of actual image, if different from the array width. + + .. note:: For formats where width of the array is not the same as the width of the image, + for example with yuv420p images the UV rows at the bottom have padding bytes in the middle of the + row as well as at the end. To cope with these, callers need to be able to pass the actual width. + """ + import numpy as np + height = array.shape[0] if not width: width = array.shape[1] @@ -495,12 +519,43 @@ cdef class VideoFrame(Frame): if array.strides[1:] != (8, 2): raise ValueError("provided array does not have C_CONTIGUOUS rows") linesizes = (array.strides[0], ) + elif format in {"gbrp"}: + check_ndarray(array, "uint8", 3) + check_ndarray_shape(array, array.shape[2] == 3) + if array.strides[1:] != (3, 1): + raise ValueError("provided array does not have C_CONTIGUOUS rows") + linesizes = (array.strides[0] // 3, array.strides[0] // 3, array.strides[0] // 3, ) + elif format in {"gbrp9be", "gbrp9le", "gbrp10be", "gbrp10le", "gbrp12be", "gbrp12le", "gbrp14be", "gbrp14le", "gbrp16be", "gbrp16le"}: + check_ndarray(array, "uint16", 3) + check_ndarray_shape(array, array.shape[2] == 3) + if array.strides[1:] != (6, 2): + raise ValueError("provided array does not have C_CONTIGUOUS rows") + linesizes = (array.strides[0] // 3, array.strides[0] // 3, array.strides[0] // 3, ) + elif format in {"gbrpf32be", "gbrpf32le"}: + check_ndarray(array, "float32", 3) + check_ndarray_shape(array, array.shape[2] == 3) + if array.strides[1:] != (12, 4): + raise ValueError("provided array does not have C_CONTIGUOUS rows") + linesizes = (array.strides[0] // 3, array.strides[0] // 3, array.strides[0] // 3, ) + elif format in {"gbrap10be", "gbrap10le", "gbrap12be", "gbrap12le", "gbrap14be", "gbrap14le", "gbrap16be", "gbrap16le"}: + check_ndarray(array, "uint16", 3) + check_ndarray_shape(array, array.shape[2] == 4) + if array.strides[1:] != (8, 2): + raise ValueError("provided array does not have C_CONTIGUOUS rows") + linesizes = (array.strides[0] // 4, array.strides[0] // 4, array.strides[0] // 4, array.strides[0] // 4, ) + elif format in {"gbrapf32be", "gbrapf32le"}: + check_ndarray(array, "float32", 3) + check_ndarray_shape(array, array.shape[2] == 4) + if array.strides[1:] != (16, 4): + raise ValueError("provided array does not have C_CONTIGUOUS rows") + linesizes = (array.strides[0] // 4, array.strides[0] // 4, array.strides[0] // 4, array.strides[0] // 4, ) elif format in {"gray", "gray8", "rgb8", "bgr8","bayer_bggr8", "bayer_rggb8", "bayer_gbrg8", "bayer_grbg8"}: check_ndarray(array, "uint8", 2) if array.strides[1] != 1: raise ValueError("provided array does not have C_CONTIGUOUS rows") linesizes = (array.strides[0], ) - elif format in {"gray16le", "gray16be", "bayer_rggb16le", "bayer_gbrg16le", "bayer_grbg16le","bayer_bggr16be", "bayer_rggb16be", "bayer_gbrg16be", "bayer_grbg16be"}: + elif format in {"gray9be", "gray9le", "gray10be", "gray10le", "gray12be", "gray12le", "gray14be", "gray14le", "gray16be", "gray16le", + "bayer_bggr16be", "bayer_bggr16le", "bayer_gbrg16be", "bayer_gbrg16le", "bayer_grbg16be", "bayer_grbg16le", "bayer_rggb16be", "bayer_rggb16le"}: check_ndarray(array, "uint16", 2) if array.strides[1] != 2: raise ValueError("provided array does not have C_CONTIGUOUS rows") @@ -525,7 +580,11 @@ cdef class VideoFrame(Frame): linesizes = (array.strides[0], array.strides[0]) else: raise ValueError(f"Conversion from numpy array with format `{format}` is not yet supported") - + + if format.startswith("gbrap"): # rgba -> gbra + array = np.ascontiguousarray(np.moveaxis(array[..., [1, 2, 0, 3]], -1, 0)) + elif format.startswith("gbrp"): # rgb -> gbr + array = np.ascontiguousarray(np.moveaxis(array[..., [1, 2, 0]], -1, 0)) frame = alloc_video_frame() frame._image_fill_pointers_numpy(array, width, height, linesizes, format) return frame @@ -600,45 +659,63 @@ cdef class VideoFrame(Frame): # case layers are concatenated channels, itemsize, dtype = { - "yuv444p": (3, 1, "uint8"), - "yuvj444p": (3, 1, "uint8"), + "bayer_bggr8": (1, 1, "uint8"), + "bayer_rggb8": (1, 1, "uint8"), + "bayer_grbg8": (1, 1, "uint8"), + "bayer_gbrg8": (1, 1, "uint8"), + "bayer_bggr16be": (1, 2, "uint16"), + "bayer_bggr16le": (1, 2, "uint16"), + "bayer_rggb16be": (1, 2, "uint16"), + "bayer_rggb16le": (1, 2, "uint16"), + "bayer_grbg16be": (1, 2, "uint16"), + "bayer_grbg16le": (1, 2, "uint16"), + "bayer_gbrg16be": (1, 2, "uint16"), + "bayer_gbrg16le": (1, 2, "uint16"), + "bgr8": (1, 1, "uint8"), + "gbrap10be": (4, 2, "uint16"), + "gbrap10le": (4, 2, "uint16"), + "gbrap12be": (4, 2, "uint16"), + "gbrap12le": (4, 2, "uint16"), + "gbrap14be": (4, 2, "uint16"), + "gbrap14le": (4, 2, "uint16"), + "gbrap16be": (4, 2, "uint16"), + "gbrap16le": (4, 2, "uint16"), + "gbrapf32be": (4, 4, "float32"), + "gbrapf32le": (4, 4, "float32"), "gbrp": (3, 1, "uint8"), + "gbrp9be": (3, 2, "uint16"), + "gbrp9le": (3, 2, "uint16"), "gbrp10be": (3, 2, "uint16"), - "gbrp12be": (3, 2, "uint16"), - "gbrp14be": (3, 2, "uint16"), - "gbrp16be": (3, 2, "uint16"), "gbrp10le": (3, 2, "uint16"), + "gbrp12be": (3, 2, "uint16"), "gbrp12le": (3, 2, "uint16"), + "gbrp14be": (3, 2, "uint16"), "gbrp14le": (3, 2, "uint16"), + "gbrp16be": (3, 2, "uint16"), "gbrp16le": (3, 2, "uint16"), "gbrpf32be": (3, 4, "float32"), "gbrpf32le": (3, 4, "float32"), "gray": (1, 1, "uint8"), "gray8": (1, 1, "uint8"), - "rgb8": (1, 1, "uint8"), - "bgr8": (1, 1, "uint8"), + "gray9be": (1, 2, "uint16"), + "gray9le": (1, 2, "uint16"), + "gray10be": (1, 2, "uint16"), + "gray10le": (1, 2, "uint16"), + "gray12be": (1, 2, "uint16"), + "gray12le": (1, 2, "uint16"), + "gray14be": (1, 2, "uint16"), + "gray14le": (1, 2, "uint16"), "gray16be": (1, 2, "uint16"), "gray16le": (1, 2, "uint16"), "grayf32be": (1, 4, "float32"), "grayf32le": (1, 4, "float32"), - "gbrapf32be": (4, 4, "float32"), - "gbrapf32le": (4, 4, "float32"), + "rgb8": (1, 1, "uint8"), + "yuv444p": (3, 1, "uint8"), + "yuvj444p": (3, 1, "uint8"), "yuv444p16be": (3, 2, "uint16"), "yuv444p16le": (3, 2, "uint16"), "yuva444p16be": (4, 2, "uint16"), - "yuva444p16le": (4, 2, "uint16"), - "bayer_bggr8": (1, 1, "uint8"), - "bayer_rggb8": (1, 1, "uint8"), - "bayer_grbg8": (1, 1, "uint8"), - "bayer_gbrg8": (1, 1, "uint8"), - "bayer_bggr16be": (1, 2, "uint16"), - "bayer_bggr16le": (1, 2, "uint16"), - "bayer_rggb16be": (1, 2, "uint16"), - "bayer_rggb16le": (1, 2, "uint16"), - "bayer_grbg16be": (1, 2, "uint16"), - "bayer_grbg16le": (1, 2, "uint16"), - "bayer_gbrg16be": (1, 2, "uint16"), - "bayer_gbrg16le": (1, 2, "uint16"), + "yuva444p16le": (4, 2, "uint16"), }.get(format, (None, None, None)) if channels is not None: if array.ndim == 2: # (height, width) -> (height, width, 1) @@ -768,12 +845,25 @@ cdef class VideoFrame(Frame): return frame @staticmethod - def from_bytes(img_bytes: bytes, width: int, height: int, format="rgba", flip_horizontal=False, flip_vertical=False): + def from_bytes(img_bytes: bytes, width: int, height: int, format="rgb24", flip_horizontal=False, flip_vertical=False): + """ + Construct a frame from raw bytes. + + :param img_bytes: Raw image data. + :param width: Frame width. + :param height: Frame height. + :param format: Pixel format, e.g. "rgb24". + :param flip_horizontal: If True, flip image horizontally. + :param flip_vertical: If True, flip image vertically. + """ frame = VideoFrame(width, height, format) - if format == "rgba": - copy_bytes_to_plane(img_bytes, frame.planes[0], 4, flip_horizontal, flip_vertical) - elif format in ("bayer_bggr8", "bayer_rggb8", "bayer_gbrg8", "bayer_grbg8","bayer_bggr16le", "bayer_rggb16le", "bayer_gbrg16le", "bayer_grbg16le","bayer_bggr16be", "bayer_rggb16be", "bayer_gbrg16be", "bayer_grbg16be"): - copy_bytes_to_plane(img_bytes, frame.planes[0], 1 if format.endswith("8") else 2, flip_horizontal, flip_vertical) + if frame.format.is_planar: + raise NotImplementedError(f"Conversion from bytes with format `{format}` is not yet supported") else: - raise NotImplementedError(f"Format '{format}' is not supported.") + bytes_per_pixel = frame.format.padded_bits_per_pixel // 8 + expected_size = width * height * bytes_per_pixel + if len(img_bytes) != expected_size: + raise ValueError(f"Expected {expected_size} bytes, got {len(img_bytes)}") + copy_bytes_to_plane(img_bytes, frame.planes[0], bytes_per_pixel, flip_horizontal, flip_vertical) return frame + diff --git a/tests/test_videoframe.py b/tests/test_videoframe.py index 26549b31b..32946f004 100644 --- a/tests/test_videoframe.py +++ b/tests/test_videoframe.py @@ -329,6 +329,24 @@ def test_ndarray_gbrp_align() -> None: assert frame.format.name == "gbrp" assertNdarraysEqual(frame.to_ndarray(), array) +def test_ndarray_gbrp9() -> None: + array = numpy.random.randint(0, 512, size=(480, 640, 3), dtype=numpy.uint16) + for format in ("gbrp9be", "gbrp9le"): + frame = VideoFrame.from_ndarray(array, format=format) + assert format in av.video.frame.supported_np_pix_fmts + assert frame.width == 640 and frame.height == 480 + assert frame.format.name == format + assertNdarraysEqual(frame.to_ndarray(), array) + + +def test_ndarray_gbrp9_align() -> None: + array = numpy.random.randint(0, 512, size=(238, 318, 3), dtype=numpy.uint16) + for format in ("gbrp9be", "gbrp9le"): + frame = VideoFrame.from_ndarray(array, format=format) + assert format in av.video.frame.supported_np_pix_fmts + assert frame.width == 318 and frame.height == 238 + assert frame.format.name == format + assertNdarraysEqual(frame.to_ndarray(), array) def test_ndarray_gbrp10() -> None: array = numpy.random.randint(0, 1024, size=(480, 640, 3), dtype=numpy.uint16) @@ -568,25 +586,27 @@ def test_ndarray_yuyv422_align() -> None: def test_ndarray_gray16be() -> None: - array = numpy.random.randint(0, 65536, size=(480, 640), dtype=numpy.uint16) - frame = VideoFrame.from_ndarray(array, format="gray16be") - assert frame.width == 640 and frame.height == 480 - assert frame.format.name == "gray16be" - assertNdarraysEqual(frame.to_ndarray(), array) - - # check endianness by examining value of first pixel - assertPixelValue16(frame.planes[0], array[0][0], "big") + for bits in (9,10,12,14,16): + array = numpy.random.randint(0, 2**bits, size=(480, 640), dtype=numpy.uint16) + frame = VideoFrame.from_ndarray(array, format=f"gray{bits}be") + assert frame.width == 640 and frame.height == 480 + assert frame.format.name == f"gray{bits}be" + assertNdarraysEqual(frame.to_ndarray(), array) + + # check endianness by examining value of first pixel + assertPixelValue16(frame.planes[0], array[0][0], "big") def test_ndarray_gray16le() -> None: - array = numpy.random.randint(0, 65536, size=(480, 640), dtype=numpy.uint16) - frame = VideoFrame.from_ndarray(array, format="gray16le") - assert frame.width == 640 and frame.height == 480 - assert frame.format.name == "gray16le" - assertNdarraysEqual(frame.to_ndarray(), array) - - # check endianness by examining value of first pixel - assertPixelValue16(frame.planes[0], array[0][0], "little") + for bits in (9,10,12,14,16): + array = numpy.random.randint(0, 2**bits, size=(480, 640), dtype=numpy.uint16) + frame = VideoFrame.from_ndarray(array, format=f"gray{bits}le") + assert frame.width == 640 and frame.height == 480 + assert frame.format.name == f"gray{bits}le" + assertNdarraysEqual(frame.to_ndarray(), array) + + # check endianness by examining value of first pixel + assertPixelValue16(frame.planes[0], array[0][0], "little") def test_ndarray_rgb48be() -> None: