Dynamic in-browser video stitching

seamless.js No brainer, lean on MediaSource for on demand DASH clip concat client-side

I did some work prototyping a personalised video app recently. Its main feature was giving users the ability to, out of an open ended list of prepared clips, create their own sequence on the spot. I chose Firebase for all the out of the box authentication and upload functionality.

Firebase also integrates nicely with cloud functions. Those came in handy for encoding clip uploads. Then I tried calling on FFmpeg for concatenating the clips; ffmpeg-static provides GCP installable binaries to that end. It was always going to be memory consuming. It ran painfully slow as well.

// Non-starter, sample FFmpeg-based concatenation server-side
const ffmpeg = require('fluent-ffmpeg')
const { path: ffmpegPath } = require('ffmpeg-static')
const os = require('os')

const tmpdir = os.tmpdir()

function concat(list = [], target = '/my/output/path') {
  return new Promise((resolve, reject) => {
    list
      .reduce((prev, next) => prev.addInput(next), ffmpeg().setFfmpegPath(ffmpegPath))
      .mergeToFile(target, tmpdir)
      .on('end', resolve)
      .on('error', reject)
  })
}

Poor speed and having to store an ever growing archive of all combinations rendered sent me looking elsewhere. Luckily, most browsers, even iPadOS 13 Safari, support a Media Source Extensions API or MSE. MSE allows for joining together the array buffers representing various clips into a single virtual video source. Save for iOS, that means the stitching may be reliably taken care of on the front end. Basing off of the simpl.info demo for example,

// How neat, in-browser MSE-based concatenation
function render({ 
  // Playlist assets in `ArrayBuffer` format
  buffers = [new ArrayBuffer()], 
  // A bit of a pain the codec part, another option would be 'video/webm; codecs="vorbis, vp8"'
  encoding = 'video/mp4; codecs="avc1.42E01E, mp4a.40.2"'
} = {}) {
  const mediaSource = new MediaSource()
  const src = window.URL.createObjectURL(mediaSource)

  mediaSource.addEventListener('sourceopen', () => {
    const sourceBuffer = mediaSource.addSourceBuffer(encoding)

    // Experimental, 
    // https://developer.mozilla.org/en-US/docs/Web/API/SourceBuffer/mode
    if (sourceBuffer.mode === 'segments') {
      sourceBuffer.mode = 'sequence'
    }

    // Init
    sourceBuffer.appendBuffer(buffers.shift())
    // Keep adding buffers L2R, close stream when done
    sourceBuffer.addEventListener('updateend', () => {
      if (buffers.length) {
        sourceBuffer.appendBuffer(buffers.shift())
      } else if (mediaSource.readyState === 'open' && sourceBuffer.updating === false) {
        mediaSource.endOfStream()
      }
    })

    window.URL.revokeObjectURL(src)
  }, { once: true })

  return src
}

Who knows if iOS will ever implement MSE and DASH. For now, it's quite possible to dynamically generate an HSL-compatible fallback M3U8 playlist when specified as a blob URL in a child <source> element instead of setting src on the host video tag.

function createPlaylist(...list) {
  const data = list
    // Get full path
    .map(item => document.location.href + item)
    // Add duration for each clip
    .reduce((crop, item) => crop.concat('#EXTINF:1', item), [
      // Head
      '#EXTM3U',
      '#EXT-X-VERSION:3',
      '#EXT-X-MEDIA-SEQUENCE:0',
      // Duration set when encoding
      '#EXT-X-TARGETDURATION:2'
    ])
    // Foot
    .concat('#EXT-X-ENDLIST')
    .join('\n')

  return new Blob([data], { type: 'application/x-mpegURL' })
}

While none of this is breakthrough, I thought extending the built-in video element to automatically handle MSE playback would be interesting on the way to maybe developing an embeddable generative audiovisual widget. But how would one declaratively specify constituent clips? Using a hollow custom element with a single src attribute is best I've been able to come up with so far,

<!-- client.html -->
<video controls playinline is="ultra-seamless" height="360" width="480">
  <ultra-seamless-clip src="fragment.mp4" alt="segment.ts"></ultra-seamless-clip>
</video>
// module.js
class SeamlessClip extends HTMLElement {
  get src() {
    return this.getAttribute('src')
  }

  set src(v) {
    if (v) {
      this.setAttribute('src', v)
    }
  }
}

class Seamless extends HTMLVideoElement {
  // ...
  connectedCallback() {
    // Collect `src` urls, tracks only
    const children = this.querySelectorAll('ultra-seamless-clip[src]')
    const assets = Array.from(children).map(o => o.getAttribute('src'))

    this.render(assets)
  }

  render(assets) {
    // Fetch and array-buffer-encode each asset before stitching
  }
}

customElements.define('ultra-seamless-clip', SeamlessClip)
customElements.define('ultra-seamless', Seamless, { extends: 'video' })

Module home, thewhodidthis/seamless →

Reference