! yt-dlp --get-url "https://www.youtube.com/watch?v=UFA_SYoLqtk"

https://manifest.googlevideo.com/api/manifest/hls_playlist/expire/1763805988/ei/xDYhaf_oCreZsfIP97brwQ4/ip/98.151.109.154/id/UFA_SYoLqtk.1/itag/96/source/yt_live_broadcast/requiressl/yes/ratebypass/yes/live/1/sgoap/gir%3Dyes%3Bitag%3D140/sgovp/gir%3Dyes%3Bitag%3D137/rqh/1/hls_chunk_host/rr20---sn-ib.googlevideo.com/xpc/EgVo2aDSNQ%3D%3D/playlist_duration/30/manifest_duration/30/bui/AdEuB5RL-U6RHKUGmgYBynes5SExVgiANkTt05ot8hLNj-0F17RT6FjrnDblfhYNS8GsYj7ECMacVnLU/spc/6b0G_CFhsiM4f8J-0ZWuC5gaS7UEdIiD3HmjpsCfRbwAb_Jx0pDq_Ygdv0VNjOfFXRI/vprv/1/playlist_type/DVR/cps/77/initcwndbps/3011250/met/1763784388,/mh/mc/mm/44/mn/sn-ib/ms/lva/mv/m/mvi/20/pl/24/rms/lva,lva/dover/11/pacing/0/keepalive/yes/fexp/51355912,51552689,51565115,51565681,51580968/mt/1763784031/sparams/expire,ei,ip,id,itag,source,requiressl,ratebypass,live,sgoap,sgovp,rqh,xpc,playlist_duration,manifest_duration,bui,spc,vprv,playlist_type/sig/AJfQdSswRAIgKbwygFg1I5ErOu1JMkXMC1I86x71zh3oIv7p-N9XPyECIA6BbAEcT2QJN0E9P9OekGiWgoz-Ma3VOTnloy5al9TU/lsparams/hls_chunk_host,cps,initcwndbps,met,mh,mm,mn,ms,mv,mvi,pl,rms/lsig/APaTxxMwRQIgOYgELDX1LxO5gdWjPn_76hYeYrkBDzgDKK8ptQjk_tQCIQC3_OWVi1wqxUOI7GH3HCB-7WQ3bQJcNyppy8L_J-T3gQ%3D%3D/playlist/index.m3u8

! gst-launch-1.0 souphttpsrc location="$(yt-dlp --get-url "https://www.youtube.com/watch?v=UFA_SYoLqtk")" is-live=1 ! decodebin ! videoconvert ! autovideosink

Setting pipeline to PAUSED ...
Pipeline is live and does not need PREROLL ...
Got context from element 'souphttpsrc0': gst.soup.session=context, session=(GstSoupSession)NULL;
Pipeline is PREROLLED ...
Setting pipeline to PLAYING ...
New clock: GstSystemClock
Got context from element 'souphttpsrc1': gst.soup.session=context, session=(GstSoupSession)NULL;
Got context from element 'souphttpsrc1': gst.soup.session=context, session=(GstSoupSession)NULL;
Redistribute latency...
Redistribute latency...
Redistribute latency...
Got context from element 'souphttpsrc1': gst.soup.session=context, session=(GstSoupSession)NULL;
Got context from element 'souphttpsrc1': gst.soup.session=context, session=(GstSoupSession)NULL;
Got context from element 'souphttpsrc1': gst.soup.session=context, session=(GstSoupSession)NULL;
handling interrupt.9.
Interrupt: Stopping pipeline ...
Execution ended after 0:00:07.538806387
Setting pipeline to NULL ...
Freeing pipeline ...
^C

hls_source = [...retrieved using yt-dlp...]
fps = 30

source_pipeline = f'''
appsink name=videoread emit-signals=true \
appsink name=audioread emit-signals=true \
souphttpsrc location="{hls_source}" is-live=true \
! hlsdemux \
! tsdemux name=demux \
demux. \
    ! tee name=video_tee \
    video_tee. \
    ! queue \
    ! h264parse \
    ! avdec_h264 \
    ! videoconvert \
    ! videorate \
    ! video/x-raw,format=RGB,framerate={fps}/1 \
    ! videoread. \
demux. \
    ! tee name=audio_tee \
    audio_tee. \
    ! queue \
    ! aacparse \
    ! faad \
    ! audioconvert \
    ! audioresample \
    ! audiobuffersplit output-buffer-duration=1/{fps} \
    ! audioread.
'''.strip()

source_pipeline = Gst.parse_launch(source_pipeline)

video_source = source_pipeline.get_by_name("videoread")
audio_source = source_pipeline.get_by_name("audioread")

import cv2
import matplotlib.pyplot as plt

image = cv2.imread("debug_spectrogram.png")
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

plt.figure(figsize=(12, 6))
plt.imshow(image)
plt.axis("off")
plt.show()

def on_audio_sample(sink):
    global spect_image

    sample = sink.emit("pull-sample")
    buffer = sample.get_buffer()
    success, map_info = buffer.map(Gst.MapFlags.READ)

    if not success:
        return Gst.FlowReturn.ERROR

    data = map_info.data
    
    # the caps say S16LE, so we interpret the buffer as int16
    tensor = np.frombuffer(data, dtype=np.int16).copy()

    # this is two channel audio, reshape accordingly
    tensor = tensor.reshape(-1, 2).T

    # use only the first channel
    tensor = tensor[0, :]
    
    # generate spectrogram
    tensor = power_to_db(spec_transform(t.from_numpy(tensor).float()))

    # flip the spectrogram vertically so the high frequencies are at the top
    tensor = np.flipud(tensor.numpy())

    # crop part of the spectrogram into a buffer.
    # this is faster than recomputing the entire spectrogram each time
    spect_image[:, :-tensor.shape[1]] = spect_image[:, tensor.shape[1]:]
    spect_image[:, -tensor.shape[1]:] = tensor

    # normalize spect_image to 0-255
    norm_image = spect_image - spect_image.min()
    norm_image = norm_image / (norm_image.max() + 1e-6)
    norm_image = norm_image * 255.0
    norm_image = norm_image.astype(np.uint8)
    
    # apply color map. Inferno looks nice!
    norm_image = cv2.applyColorMap(norm_image, cv2.COLORMAP_INFERNO)
    norm_image = cv2.cvtColor(norm_image, cv2.COLOR_BGR2RGB)

    # cut off the upper half. Since the AAC codec applies
    # Spectoral Band Replication (SBR), the higher frequencies
    # are just copies of the lower frequencies.
    norm_image = norm_image[spect_image_height//2:, :]

    # resize to a standardize size. Some slight distortion is ok.
    norm_image = cv2.resize(norm_image, (spect_video_width, spect_video_height), interpolation=cv2.INTER_AREA)

    video_buffer = Gst.Buffer.new_wrapped(norm_image.tobytes())
    video_buffer.pts = buffer.pts
    video_buffer.duration = buffer.duration

    audio_buffer = Gst.Buffer.new_wrapped(data)
    audio_buffer.pts = buffer.pts
    audio_buffer.duration = buffer.duration

    audio_write.emit("push-buffer", audio_buffer)
    spect_write.emit("push-buffer", video_buffer)

    buffer.unmap(map_info)

    return Gst.FlowReturn.OK

from IPython.display import Video

Video('https://jackmead515.github.io/videos/megalab_detected.mp4', html_attributes='loop autoplay muted playsinline width="100%"')

Streaming MegaLab Camera with Community Fish Detector¶