Skip to content
Closed

add #1882

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion av/video/frame.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ class VideoFrame(Frame):
data: bytes,
width: int,
height: int,
format: str = "rgba",
format: str = "rgb24",
flip_horizontal: bool = False,
flip_vertical: bool = False,
) -> VideoFrame: ...
178 changes: 134 additions & 44 deletions av/video/frame.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ supported_np_pix_fmts = {
"abgr", "argb", "bayer_bggr16be", "bayer_bggr16le", "bayer_bggr8", "bayer_gbrg16be",
"bayer_gbrg16le", "bayer_gbrg8", "bayer_grbg16be", "bayer_grbg16le", "bayer_grbg8",
"bayer_rggb16be", "bayer_rggb16le", "bayer_rggb8", "bgr24", "bgr48be", "bgr48le", "bgr8", "bgra", "bgra64be", "bgra64le",
"gbrapf32be", "gbrapf32le", "gbrp", "gbrp10be", "gbrp10le", "gbrp12be", "gbrp12le",
"gbrp14be", "gbrp14le", "gbrp16be", "gbrp16le", "gbrpf32be", "gbrpf32le", "gray",
"gray16be", "gray16le", "gray8", "grayf32be", "grayf32le", "nv12", "pal8", "rgb24",
"rgb48be", "rgb48le", "rgb8", "rgba", "rgba64be", "rgba64le", "yuv420p",
"yuv420p10le", "yuv422p10le", "yuv444p", "yuv444p16be", "yuv444p16le", "yuva444p16be",
"gbrap10be", "gbrap10le", "gbrap12be", "gbrap12le","gbrap14be", "gbrap14le", "gbrap16be", "gbrap16le", "gbrapf32be", "gbrapf32le",
"gbrp", "gbrp9be", "gbrp9le", "gbrp10be", "gbrp10le", "gbrp12be", "gbrp12le", "gbrp14be", "gbrp14le", "gbrp16be", "gbrp16le", "gbrpf32be", "gbrpf32le",
"gray", "gray8", "gray9be", "gray9le", "gray10be", "gray10le", "gray12be", "gray12le", "gray14be", "gray14le", "gray16be", "gray16le", "grayf32be", "grayf32le",
"nv12", "pal8", "rgb24", "rgb48be", "rgb48le", "rgb8", "rgba", "rgba64be", "rgba64le",
"yuv420p", "yuv420p10le", "yuv422p10le", "yuv444p", "yuv444p16be", "yuv444p16le", "yuva444p16be",
"yuva444p16le", "yuvj420p", "yuvj444p", "yuyv422",
}

Expand Down Expand Up @@ -346,9 +346,21 @@ cdef class VideoFrame(Frame):
"bgr48le": (6, "uint16"),
"bgr8": (1, "uint8"),
"bgra": (4, "uint8"),
"bgra64be": (8, "uint16"),
"bgra64le": (8, "uint16"),
"gbrap10be": (2, "uint16"),
"gbrap10le": (2, "uint16"),
"gbrap12be": (2, "uint16"),
"gbrap12le": (2, "uint16"),
"gbrap14be": (2, "uint16"),
"gbrap14le": (2, "uint16"),
"gbrap16be": (2, "uint16"),
"gbrap16le": (2, "uint16"),
"gbrapf32be": (4, "float32"),
"gbrapf32le": (4, "float32"),
"gbrp": (1, "uint8"),
"gbrp9be": (2, "uint16"),
"gbrp9le": (2, "uint16"),
"gbrp10be": (2, "uint16"),
"gbrp10le": (2, "uint16"),
"gbrp12be": (2, "uint16"),
Expand All @@ -360,9 +372,17 @@ cdef class VideoFrame(Frame):
"gbrpf32be": (4, "float32"),
"gbrpf32le": (4, "float32"),
"gray": (1, "uint8"),
"gray8": (1, "uint8"),
"gray9be": (2, "uint16"),
"gray9le": (2, "uint16"),
"gray10be": (2, "uint16"),
"gray10le": (2, "uint16"),
"gray12be": (2, "uint16"),
"gray12le": (2, "uint16"),
"gray14be": (2, "uint16"),
"gray14le": (2, "uint16"),
"gray16be": (2, "uint16"),
"gray16le": (2, "uint16"),
"gray8": (1, "uint8"),
"grayf32be": (4, "float32"),
"grayf32le": (4, "float32"),
"rgb24": (3, "uint8"),
Expand All @@ -372,8 +392,6 @@ cdef class VideoFrame(Frame):
"rgba": (4, "uint8"),
"rgba64be": (8, "uint16"),
"rgba64le": (8, "uint16"),
"bgra64be": (8, "uint16"),
"bgra64le": (8, "uint16"),
"yuv444p": (1, "uint8"),
"yuv444p16be": (2, "uint16"),
"yuv444p16le": (2, "uint16"),
Expand Down Expand Up @@ -461,12 +479,18 @@ cdef class VideoFrame(Frame):
return frame

@staticmethod
def from_numpy_buffer(array, format="rgb24", width=0):
# Usually the width of the array is the same as the width of the image. But sometimes
# this is not possible, for example with yuv420p images that have padding. These are
# awkward because the UV rows at the bottom have padding bytes in the middle of the
# row as well as at the end. To cope with these, callers need to be able to pass the
# actual width to us.
def from_numpy_buffer(array, format="rgb24", width:int=0):
"""
Construct a frame from a numpy buffer.

:param int width: optional width of actual image, if different from the array width.

.. note:: For formats where width of the array is not the same as the width of the image,
for example with yuv420p images the UV rows at the bottom have padding bytes in the middle of the
row as well as at the end. To cope with these, callers need to be able to pass the actual width.
"""
import numpy as np

height = array.shape[0]
if not width:
width = array.shape[1]
Expand Down Expand Up @@ -495,12 +519,43 @@ cdef class VideoFrame(Frame):
if array.strides[1:] != (8, 2):
raise ValueError("provided array does not have C_CONTIGUOUS rows")
linesizes = (array.strides[0], )
elif format in {"gbrp"}:
check_ndarray(array, "uint8", 3)
check_ndarray_shape(array, array.shape[2] == 3)
if array.strides[1:] != (3, 1):
raise ValueError("provided array does not have C_CONTIGUOUS rows")
linesizes = (array.strides[0] // 3, array.strides[0] // 3, array.strides[0] // 3, )
elif format in {"gbrp9be", "gbrp9le", "gbrp10be", "gbrp10le", "gbrp12be", "gbrp12le", "gbrp14be", "gbrp14le", "gbrp16be", "gbrp16le"}:
check_ndarray(array, "uint16", 3)
check_ndarray_shape(array, array.shape[2] == 3)
if array.strides[1:] != (6, 2):
raise ValueError("provided array does not have C_CONTIGUOUS rows")
linesizes = (array.strides[0] // 3, array.strides[0] // 3, array.strides[0] // 3, )
elif format in {"gbrpf32be", "gbrpf32le"}:
check_ndarray(array, "float32", 3)
check_ndarray_shape(array, array.shape[2] == 3)
if array.strides[1:] != (12, 4):
raise ValueError("provided array does not have C_CONTIGUOUS rows")
linesizes = (array.strides[0] // 3, array.strides[0] // 3, array.strides[0] // 3, )
elif format in {"gbrap10be", "gbrap10le", "gbrap12be", "gbrap12le", "gbrap14be", "gbrap14le", "gbrap16be", "gbrap16le"}:
check_ndarray(array, "uint16", 3)
check_ndarray_shape(array, array.shape[2] == 4)
if array.strides[1:] != (8, 2):
raise ValueError("provided array does not have C_CONTIGUOUS rows")
linesizes = (array.strides[0] // 4, array.strides[0] // 4, array.strides[0] // 4, array.strides[0] // 4, )
elif format in {"gbrapf32be", "gbrapf32le"}:
check_ndarray(array, "float32", 3)
check_ndarray_shape(array, array.shape[2] == 4)
if array.strides[1:] != (16, 4):
raise ValueError("provided array does not have C_CONTIGUOUS rows")
linesizes = (array.strides[0] // 4, array.strides[0] // 4, array.strides[0] // 4, array.strides[0] // 4, )
elif format in {"gray", "gray8", "rgb8", "bgr8","bayer_bggr8", "bayer_rggb8", "bayer_gbrg8", "bayer_grbg8"}:
check_ndarray(array, "uint8", 2)
if array.strides[1] != 1:
raise ValueError("provided array does not have C_CONTIGUOUS rows")
linesizes = (array.strides[0], )
elif format in {"gray16le", "gray16be", "bayer_rggb16le", "bayer_gbrg16le", "bayer_grbg16le","bayer_bggr16be", "bayer_rggb16be", "bayer_gbrg16be", "bayer_grbg16be"}:
elif format in {"gray9be", "gray9le", "gray10be", "gray10le", "gray12be", "gray12le", "gray14be", "gray14le", "gray16be", "gray16le",
"bayer_bggr16be", "bayer_bggr16le", "bayer_gbrg16be", "bayer_gbrg16le", "bayer_grbg16be", "bayer_grbg16le", "bayer_rggb16be", "bayer_rggb16le"}:
check_ndarray(array, "uint16", 2)
if array.strides[1] != 2:
raise ValueError("provided array does not have C_CONTIGUOUS rows")
Expand All @@ -525,7 +580,11 @@ cdef class VideoFrame(Frame):
linesizes = (array.strides[0], array.strides[0])
else:
raise ValueError(f"Conversion from numpy array with format `{format}` is not yet supported")


if format.startswith("gbrap"): # rgba -> gbra
array = np.ascontiguousarray(np.moveaxis(array[..., [1, 2, 0, 3]], -1, 0))
elif format.startswith("gbrp"): # rgb -> gbr
array = np.ascontiguousarray(np.moveaxis(array[..., [1, 2, 0]], -1, 0))
frame = alloc_video_frame()
frame._image_fill_pointers_numpy(array, width, height, linesizes, format)
return frame
Expand Down Expand Up @@ -600,45 +659,63 @@ cdef class VideoFrame(Frame):

# case layers are concatenated
channels, itemsize, dtype = {
"yuv444p": (3, 1, "uint8"),
"yuvj444p": (3, 1, "uint8"),
"bayer_bggr8": (1, 1, "uint8"),
"bayer_rggb8": (1, 1, "uint8"),
"bayer_grbg8": (1, 1, "uint8"),
"bayer_gbrg8": (1, 1, "uint8"),
"bayer_bggr16be": (1, 2, "uint16"),
"bayer_bggr16le": (1, 2, "uint16"),
"bayer_rggb16be": (1, 2, "uint16"),
"bayer_rggb16le": (1, 2, "uint16"),
"bayer_grbg16be": (1, 2, "uint16"),
"bayer_grbg16le": (1, 2, "uint16"),
"bayer_gbrg16be": (1, 2, "uint16"),
"bayer_gbrg16le": (1, 2, "uint16"),
"bgr8": (1, 1, "uint8"),
"gbrap10be": (4, 2, "uint16"),
"gbrap10le": (4, 2, "uint16"),
"gbrap12be": (4, 2, "uint16"),
"gbrap12le": (4, 2, "uint16"),
"gbrap14be": (4, 2, "uint16"),
"gbrap14le": (4, 2, "uint16"),
"gbrap16be": (4, 2, "uint16"),
"gbrap16le": (4, 2, "uint16"),
"gbrapf32be": (4, 4, "float32"),
"gbrapf32le": (4, 4, "float32"),
"gbrp": (3, 1, "uint8"),
"gbrp9be": (3, 2, "uint16"),
"gbrp9le": (3, 2, "uint16"),
"gbrp10be": (3, 2, "uint16"),
"gbrp12be": (3, 2, "uint16"),
"gbrp14be": (3, 2, "uint16"),
"gbrp16be": (3, 2, "uint16"),
"gbrp10le": (3, 2, "uint16"),
"gbrp12be": (3, 2, "uint16"),
"gbrp12le": (3, 2, "uint16"),
"gbrp14be": (3, 2, "uint16"),
"gbrp14le": (3, 2, "uint16"),
"gbrp16be": (3, 2, "uint16"),
"gbrp16le": (3, 2, "uint16"),
"gbrpf32be": (3, 4, "float32"),
"gbrpf32le": (3, 4, "float32"),
"gray": (1, 1, "uint8"),
"gray8": (1, 1, "uint8"),
"rgb8": (1, 1, "uint8"),
"bgr8": (1, 1, "uint8"),
"gray9be": (1, 2, "uint16"),
"gray9le": (1, 2, "uint16"),
"gray10be": (1, 2, "uint16"),
"gray10le": (1, 2, "uint16"),
"gray12be": (1, 2, "uint16"),
"gray12le": (1, 2, "uint16"),
"gray14be": (1, 2, "uint16"),
"gray14le": (1, 2, "uint16"),
"gray16be": (1, 2, "uint16"),
"gray16le": (1, 2, "uint16"),
"grayf32be": (1, 4, "float32"),
"grayf32le": (1, 4, "float32"),
"gbrapf32be": (4, 4, "float32"),
"gbrapf32le": (4, 4, "float32"),
"rgb8": (1, 1, "uint8"),
"yuv444p": (3, 1, "uint8"),
"yuvj444p": (3, 1, "uint8"),
"yuv444p16be": (3, 2, "uint16"),
"yuv444p16le": (3, 2, "uint16"),
"yuva444p16be": (4, 2, "uint16"),
"yuva444p16le": (4, 2, "uint16"),
"bayer_bggr8": (1, 1, "uint8"),
"bayer_rggb8": (1, 1, "uint8"),
"bayer_grbg8": (1, 1, "uint8"),
"bayer_gbrg8": (1, 1, "uint8"),
"bayer_bggr16be": (1, 2, "uint16"),
"bayer_bggr16le": (1, 2, "uint16"),
"bayer_rggb16be": (1, 2, "uint16"),
"bayer_rggb16le": (1, 2, "uint16"),
"bayer_grbg16be": (1, 2, "uint16"),
"bayer_grbg16le": (1, 2, "uint16"),
"bayer_gbrg16be": (1, 2, "uint16"),
"bayer_gbrg16le": (1, 2, "uint16"),
"yuva444p16le": (4, 2, "uint16"),
}.get(format, (None, None, None))
if channels is not None:
if array.ndim == 2: # (height, width) -> (height, width, 1)
Expand Down Expand Up @@ -768,12 +845,25 @@ cdef class VideoFrame(Frame):
return frame

@staticmethod
def from_bytes(img_bytes: bytes, width: int, height: int, format="rgba", flip_horizontal=False, flip_vertical=False):
def from_bytes(img_bytes: bytes, width: int, height: int, format="rgb24", flip_horizontal=False, flip_vertical=False):
"""
Construct a frame from raw bytes.

:param img_bytes: Raw image data.
:param width: Frame width.
:param height: Frame height.
:param format: Pixel format, e.g. "rgb24".
:param flip_horizontal: If True, flip image horizontally.
:param flip_vertical: If True, flip image vertically.
"""
frame = VideoFrame(width, height, format)
if format == "rgba":
copy_bytes_to_plane(img_bytes, frame.planes[0], 4, flip_horizontal, flip_vertical)
elif format in ("bayer_bggr8", "bayer_rggb8", "bayer_gbrg8", "bayer_grbg8","bayer_bggr16le", "bayer_rggb16le", "bayer_gbrg16le", "bayer_grbg16le","bayer_bggr16be", "bayer_rggb16be", "bayer_gbrg16be", "bayer_grbg16be"):
copy_bytes_to_plane(img_bytes, frame.planes[0], 1 if format.endswith("8") else 2, flip_horizontal, flip_vertical)
if frame.format.is_planar:
raise NotImplementedError(f"Conversion from bytes with format `{format}` is not yet supported")
else:
raise NotImplementedError(f"Format '{format}' is not supported.")
bytes_per_pixel = frame.format.padded_bits_per_pixel // 8
expected_size = width * height * bytes_per_pixel
if len(img_bytes) != expected_size:
raise ValueError(f"Expected {expected_size} bytes, got {len(img_bytes)}")
copy_bytes_to_plane(img_bytes, frame.planes[0], bytes_per_pixel, flip_horizontal, flip_vertical)
return frame

52 changes: 36 additions & 16 deletions tests/test_videoframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,24 @@ def test_ndarray_gbrp_align() -> None:
assert frame.format.name == "gbrp"
assertNdarraysEqual(frame.to_ndarray(), array)

def test_ndarray_gbrp9() -> None:
array = numpy.random.randint(0, 512, size=(480, 640, 3), dtype=numpy.uint16)
for format in ("gbrp9be", "gbrp9le"):
frame = VideoFrame.from_ndarray(array, format=format)
assert format in av.video.frame.supported_np_pix_fmts
assert frame.width == 640 and frame.height == 480
assert frame.format.name == format
assertNdarraysEqual(frame.to_ndarray(), array)


def test_ndarray_gbrp9_align() -> None:
array = numpy.random.randint(0, 512, size=(238, 318, 3), dtype=numpy.uint16)
for format in ("gbrp9be", "gbrp9le"):
frame = VideoFrame.from_ndarray(array, format=format)
assert format in av.video.frame.supported_np_pix_fmts
assert frame.width == 318 and frame.height == 238
assert frame.format.name == format
assertNdarraysEqual(frame.to_ndarray(), array)

def test_ndarray_gbrp10() -> None:
array = numpy.random.randint(0, 1024, size=(480, 640, 3), dtype=numpy.uint16)
Expand Down Expand Up @@ -568,25 +586,27 @@ def test_ndarray_yuyv422_align() -> None:


def test_ndarray_gray16be() -> None:
array = numpy.random.randint(0, 65536, size=(480, 640), dtype=numpy.uint16)
frame = VideoFrame.from_ndarray(array, format="gray16be")
assert frame.width == 640 and frame.height == 480
assert frame.format.name == "gray16be"
assertNdarraysEqual(frame.to_ndarray(), array)

# check endianness by examining value of first pixel
assertPixelValue16(frame.planes[0], array[0][0], "big")
for bits in (9,10,12,14,16):
array = numpy.random.randint(0, 2**bits, size=(480, 640), dtype=numpy.uint16)
frame = VideoFrame.from_ndarray(array, format=f"gray{bits}be")
assert frame.width == 640 and frame.height == 480
assert frame.format.name == f"gray{bits}be"
assertNdarraysEqual(frame.to_ndarray(), array)

# check endianness by examining value of first pixel
assertPixelValue16(frame.planes[0], array[0][0], "big")


def test_ndarray_gray16le() -> None:
array = numpy.random.randint(0, 65536, size=(480, 640), dtype=numpy.uint16)
frame = VideoFrame.from_ndarray(array, format="gray16le")
assert frame.width == 640 and frame.height == 480
assert frame.format.name == "gray16le"
assertNdarraysEqual(frame.to_ndarray(), array)

# check endianness by examining value of first pixel
assertPixelValue16(frame.planes[0], array[0][0], "little")
for bits in (9,10,12,14,16):
array = numpy.random.randint(0, 2**bits, size=(480, 640), dtype=numpy.uint16)
frame = VideoFrame.from_ndarray(array, format=f"gray{bits}le")
assert frame.width == 640 and frame.height == 480
assert frame.format.name == f"gray{bits}le"
assertNdarraysEqual(frame.to_ndarray(), array)

# check endianness by examining value of first pixel
assertPixelValue16(frame.planes[0], array[0][0], "little")


def test_ndarray_rgb48be() -> None:
Expand Down
Loading