How do I only decode audio but mux video from an rtsp source?

## Overview

So basically I want to decode audio then resample/encode it back to be muxed, while also muxing video. I am trying to split a video into multiple mpegts chunks.
The problem I'm  facing is that when I resample/encode the audio, the result is all choppy and weird.
If I only mux the audio instead of resampling it, everything is perfect.

Technically I save the chunk on every keyframe but it could be on any number of frames, as I understand it, it just needs to start on a keyframe. As it turns out the example below is perfectly a keyframe per 2 seconds so i save 2 seconds chunks. 

This code can directly be used by simply replacing the `output_path` variable at the top to a valid path.
This code only creates 5 chunks for demonstration purposes:

```

import io
import attr
import av


output_path = "/some/path" # without the ending '/'
audio_sample_rate = 44100
rtsp_url = "rtsp://wowzaec2demo.streamlock.net/vod/mp4:BigBuckBunny_115k.mov"

@attr.s
class Buffer:
    chunk = attr.ib(type=io.BytesIO) 
    container = attr.ib()
    vstream = attr.ib()
    astream = attr.ib(default=None)

def save_chunk(segment_data, segment_index):
    f = open(f"{output_path}/mpgts_{str(segment_index)}.ts", 'wb')
    f.write(segment_data.getvalue())
    f.close()

def create_output(video_stream, audio_steam):
    chunk = io.BytesIO()
    output = av.open(chunk, mode="w", format="mpegts")
    astream = None
    if audio_steam is not None:
        astream = output.add_stream(codec_name="aac", rate=audio_sample_rate)
    vstream = output.add_stream(template=video_stream)
    return Buffer(chunk, output, vstream, astream)

def main():
    options = {"rtsp_flags": "prefer_tcp"}
    container = av.open(rtsp_url, options=options)
    audio_stream = None
    video_stream = None
    try:
        video_stream = container.streams.video[0]
        audio_stream = container.streams.audio[0]
    except (KeyError, IndexError):
        return

    segment_count = 0
    first_audio_packet = True
    first_audio_pts = 0
    first_video_packet = True
    first_video_pts = 0
    first_video_dts = 0
    output = None
    resampler = av.AudioResampler(audio_stream.format, audio_stream.layout, audio_sample_rate)

    while True:
    
        packet = next(container.demux(video_stream, audio_stream))
        if packet is None:
            break

        if packet.dts is None:
            continue

        if packet.stream.type == 'video':
            if first_video_packet:
                first_video_pts = packet.pts
                first_video_dts = packet.dts
                first_video_packet = False
            if packet.is_keyframe:
                if output is not None:
                    output.container.close()
                    save_chunk(output.chunk, segment_count)
                    output.chunk.close()
                    segment_count += 1
                if segment_count >= 5:
                    break
                output = create_output(video_stream, audio_stream)

            packet.pts -= first_video_pts
            packet.dts -= first_video_dts

            if output is not None and output.vstream is not None:
                print(f"video_packet: {str(packet)}, dur: {str(packet.duration)}")
                packet.stream = output.vstream
                output.container.mux(packet)
        else:
            if output is None:
                continue
            if first_audio_packet:
                first_audio_pts = packet.pts
                first_audio_packet = False
            packet.pts -= first_audio_pts
            packet.dts -= first_audio_pts
            print(f"audio_packet: pts={str(packet.pts)} time_base={str(packet.time_base)}, dur: {str(packet.duration)}")
            for a_frame in packet.decode():
                print(f"original a_frame: {str(a_frame)}, samples: {str(a_frame.samples)}, time_base={a_frame.time_base}")
                sampled_frame = resampler.resample(a_frame)
                print(f"resampled sampled_frame: {str(sampled_frame)}, samples: {str(sampled_frame.samples)}, time_base={sampled_frame.time_base}")
                sampled_frame.pts = None
                a_packets = output.astream.encode(sampled_frame)
                for a_packet in a_packets:
                    a_packet.stream = output.astream
                    print(f"a_packet: pts={str(a_packet.pts)} time_base={str(a_packet.time_base)}, dur: {str(a_packet.duration)}, stream tb: {str(output.astream.time_base)}")
                    output.container.mux(a_packet)

    container.close()

main()
```

## Expected behavior
The video should play smoothly without hiccups.


## Actual behavior

The playback is choppy even the video part (not just audio).

## Investigation

I've tried all sorts of stuff:

- Instead of creating my own Resampler, directly use the encode function of the audio output by splitting the demux function which is given both audio and video (in the example) to `decode` for audio and `demux` for video: same result
- Calculate the audio pts manually using the packet duration and incrementing (this is not perfect as video can go ahead of audio if there are any dropped frames): same result
- Calculate the audio pts based off of the last video pts: same result
- Set all pts/dts sent to encode to None: same result
- Set None to pts/dts for resample/encode/mux: same result


## Research

I have done the following:

- [X] Checked the [PyAV documentation](https://pyav.org/docs)
- [X] Searched on [Google](https://www.google.com/search?q=pyav+how+do+I+foo)
- [X] Searched on [Stack Overflow](https://stackoverflow.com/search?q=pyav)
- [X] Looked through [old GitHub issues](https://github.com/PyAV-Org/PyAV/issues?&q=is%3Aissue)
- [X] Asked on [PyAV Gitter](https://gitter.im/PyAV-Org)
- [X] ... and waited 72 hours for a response.


Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

How do I only decode audio but mux video from an rtsp source? #677

Overview

Expected behavior

Actual behavior

Investigation

Research

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

How do I only decode audio but mux video from an rtsp source? #677

Description

Overview

Expected behavior

Actual behavior

Investigation

Research

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions