Skip to content

Commit

Permalink
Merge pull request #426 from adrums86/emsg-support
Browse files Browse the repository at this point in the history
feat: support emsg box ID3 parsing
  • Loading branch information
adrums86 committed Feb 14, 2023
2 parents 2e50704 + e786dc1 commit a8146c7
Show file tree
Hide file tree
Showing 9 changed files with 630 additions and 173 deletions.
179 changes: 9 additions & 170 deletions lib/m2ts/metadata-stream.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,168 +12,7 @@
var
Stream = require('../utils/stream'),
StreamTypes = require('./stream-types'),
typedArrayIndexOf = require('../utils/typed-array').typedArrayIndexOf,
// Frames that allow different types of text encoding contain a text
// encoding description byte [ID3v2.4.0 section 4.]
textEncodingDescriptionByte = {
Iso88591: 0x00, // ISO-8859-1, terminated with \0.
Utf16: 0x01, // UTF-16 encoded Unicode BOM, terminated with \0\0
Utf16be: 0x02, // UTF-16BE encoded Unicode, without BOM, terminated with \0\0
Utf8: 0x03 // UTF-8 encoded Unicode, terminated with \0
},
// return a percent-encoded representation of the specified byte range
// @see http://en.wikipedia.org/wiki/Percent-encoding
percentEncode = function(bytes, start, end) {
var i, result = '';
for (i = start; i < end; i++) {
result += '%' + ('00' + bytes[i].toString(16)).slice(-2);
}
return result;
},
// return the string representation of the specified byte range,
// interpreted as UTf-8.
parseUtf8 = function(bytes, start, end) {
return decodeURIComponent(percentEncode(bytes, start, end));
},
// return the string representation of the specified byte range,
// interpreted as ISO-8859-1.
parseIso88591 = function(bytes, start, end) {
return unescape(percentEncode(bytes, start, end)); // jshint ignore:line
},
parseSyncSafeInteger = function(data) {
return (data[0] << 21) |
(data[1] << 14) |
(data[2] << 7) |
(data[3]);
},
frameParsers = {
'APIC': function(frame) {
var
i = 1,
mimeTypeEndIndex,
descriptionEndIndex,
LINK_MIME_TYPE = '-->';

if (frame.data[0] !== textEncodingDescriptionByte.Utf8) {
// ignore frames with unrecognized character encodings
return;
}

// parsing fields [ID3v2.4.0 section 4.14.]
mimeTypeEndIndex = typedArrayIndexOf(frame.data, 0, i);
if (mimeTypeEndIndex < 0) {
// malformed frame
return;
}

// parsing Mime type field (terminated with \0)
frame.mimeType = parseIso88591(frame.data, i, mimeTypeEndIndex);
i = mimeTypeEndIndex + 1;

// parsing 1-byte Picture Type field
frame.pictureType = frame.data[i];
i++

descriptionEndIndex = typedArrayIndexOf(frame.data, 0, i);
if (descriptionEndIndex < 0) {
// malformed frame
return;
}

// parsing Description field (terminated with \0)
frame.description = parseUtf8(frame.data, i, descriptionEndIndex);
i = descriptionEndIndex + 1;

if (frame.mimeType === LINK_MIME_TYPE) {
// parsing Picture Data field as URL (always represented as ISO-8859-1 [ID3v2.4.0 section 4.])
frame.url = parseIso88591(frame.data, i, frame.data.length)
} else {
// parsing Picture Data field as binary data
frame.pictureData = frame.data.subarray(i, frame.data.length);
}
},
'T*': function(frame) {
if (frame.data[0] !== textEncodingDescriptionByte.Utf8) {
// ignore frames with unrecognized character encodings
return;
}

// parse text field, do not include null terminator in the frame value
// frames that allow different types of encoding contain terminated text [ID3v2.4.0 section 4.]
frame.value = parseUtf8(frame.data, 1, frame.data.length).replace(/\0*$/, '');
// text information frames supports multiple strings, stored as a terminator separated list [ID3v2.4.0 section 4.2.]
frame.values = frame.value.split('\0');
},
'TXXX': function(frame) {
var descriptionEndIndex;

if (frame.data[0] !== textEncodingDescriptionByte.Utf8) {
// ignore frames with unrecognized character encodings
return;
}

descriptionEndIndex = typedArrayIndexOf(frame.data, 0, 1);

if (descriptionEndIndex === -1) {
return;
}

// parse the text fields
frame.description = parseUtf8(frame.data, 1, descriptionEndIndex);
// do not include the null terminator in the tag value
// frames that allow different types of encoding contain terminated text
// [ID3v2.4.0 section 4.]
frame.value = parseUtf8(
frame.data,
descriptionEndIndex + 1,
frame.data.length
).replace(/\0*$/, '');
frame.data = frame.value;
},
'W*': function(frame) {
// parse URL field; URL fields are always represented as ISO-8859-1 [ID3v2.4.0 section 4.]
// if the value is followed by a string termination all the following information should be ignored [ID3v2.4.0 section 4.3]
frame.url = parseIso88591(frame.data, 0, frame.data.length).replace(/\0.*$/, '');
},
'WXXX': function(frame) {
var descriptionEndIndex;

if (frame.data[0] !== textEncodingDescriptionByte.Utf8) {
// ignore frames with unrecognized character encodings
return;
}

descriptionEndIndex = typedArrayIndexOf(frame.data, 0, 1);

if (descriptionEndIndex === -1) {
return;
}

// parse the description and URL fields
frame.description = parseUtf8(frame.data, 1, descriptionEndIndex);
// URL fields are always represented as ISO-8859-1 [ID3v2.4.0 section 4.]
// if the value is followed by a string termination all the following information
// should be ignored [ID3v2.4.0 section 4.3]
frame.url = parseIso88591(
frame.data,
descriptionEndIndex + 1,
frame.data.length
).replace(/\0.*$/, '');
},
'PRIV': function(frame) {
var i;

for (i = 0; i < frame.data.length; i++) {
if (frame.data[i] === 0) {
// parse the description and URL fields
frame.owner = parseIso88591(frame.data, 0, i);
break;
}
}
frame.privateData = frame.data.subarray(i + 1);
frame.data = frame.privateData;
}
},
id3 = require('../tools/parse-id3'),
MetadataStream;

MetadataStream = function(options) {
Expand Down Expand Up @@ -241,7 +80,7 @@ MetadataStream = function(options) {
// last four bytes of the ID3 header.
// The most significant bit of each byte is dropped and the
// results concatenated to recover the actual value.
tagSize = parseSyncSafeInteger(chunk.data.subarray(6, 10));
tagSize = id3.parseSyncSafeInteger(chunk.data.subarray(6, 10));

// ID3 reports the tag size excluding the header but it's more
// convenient for our comparisons to include it
Expand Down Expand Up @@ -272,17 +111,17 @@ MetadataStream = function(options) {
if (tag.data[5] & 0x40) {
// advance the frame start past the extended header
frameStart += 4; // header size field
frameStart += parseSyncSafeInteger(tag.data.subarray(10, 14));
frameStart += id3.parseSyncSafeInteger(tag.data.subarray(10, 14));

// clip any padding off the end
tagSize -= parseSyncSafeInteger(tag.data.subarray(16, 20));
tagSize -= id3.parseSyncSafeInteger(tag.data.subarray(16, 20));
}

// parse one or more ID3 frames
// http://id3.org/id3v2.3.0#ID3v2_frame_overview
do {
// determine the number of bytes in this frame
frameSize = parseSyncSafeInteger(tag.data.subarray(frameStart + 4, frameStart + 8));
frameSize = id3.parseSyncSafeInteger(tag.data.subarray(frameStart + 4, frameStart + 8));
if (frameSize < 1) {
this.trigger('log', {
level: 'warn',
Expand All @@ -305,15 +144,15 @@ MetadataStream = function(options) {
frame.key = frame.id;

// parse frame values
if (frameParsers[frame.id]) {
if (id3.frameParsers[frame.id]) {
// use frame specific parser
frameParsers[frame.id](frame);
id3.frameParsers[frame.id](frame);
} else if (frame.id[0] === 'T') {
// use text frame generic parser
frameParsers['T*'](frame);
id3.frameParsers['T*'](frame);
} else if (frame.id[0] === 'W') {
// use URL link frame generic parser
frameParsers['W*'](frame);
id3.frameParsers['W*'](frame);
}

// handle the special PRIV frame used to indicate the start
Expand Down
104 changes: 104 additions & 0 deletions lib/mp4/emsg.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
var uint8ToCString = require('../utils/string.js').uint8ToCString;

/**
* Based on: ISO/IEC 23009 Section: 5.10.3.3
* References:
* https://dashif-documents.azurewebsites.net/Events/master/event.html#emsg-format
* https://aomediacodec.github.io/id3-emsg/
*
* Takes emsg box data as a uint8 array and returns a emsg box object
* @param {UInt8Array} boxData data from emsg box
* @returns A parsed emsg box object
*/
var parseEmsgBox = function(boxData) {
// version + flags
var offset = 4;
var version = boxData[0];
var scheme_id_uri,
value,
timescale,
presentation_time,
presentation_time_delta,
event_duration,
id,
message_data;
if (version === 0) {
scheme_id_uri = uint8ToCString(boxData.subarray(offset));
offset += scheme_id_uri.length;
value = uint8ToCString(boxData.subarray(offset));
offset += value.length;
var dv = new DataView(boxData.buffer);
timescale = dv.getUint32(offset);
offset += 4;
presentation_time_delta = dv.getUint32(offset);
offset += 4;
event_duration = dv.getUint32(offset);
offset += 4;
id = dv.getUint32(offset);
offset += 4;
} else if (version === 1) {
var dv = new DataView(boxData.buffer);
timescale = dv.getUint32(offset);
offset += 4;
presentation_time = Number(dv.getBigUint64(offset));
offset += 8;
event_duration = dv.getUint32(offset);
offset += 4;
id = dv.getUint32(offset);
offset += 4;
scheme_id_uri = uint8ToCString(boxData.subarray(offset));
offset += scheme_id_uri.length;
value = uint8ToCString(boxData.subarray(offset));
offset += value.length;
}

message_data = new Uint8Array(boxData.subarray(offset, boxData.byteLength));
var emsgBox = {
scheme_id_uri,
value,
// if timescale is undefined or 0 set to 1
timescale: timescale ? timescale : 1,
presentation_time,
presentation_time_delta,
event_duration,
id,
message_data };

return isValidEmsgBox(version, emsgBox) ? emsgBox : undefined;
};

/**
* Scales a presentation time or time delta with an offset with a provided timescale
* @param {number} presentationTime
* @param {number} timescale
* @param {number} timeDelta
* @param {number} offset
* @returns the scaled time as a number
*/
var scaleTime = function(presentationTime, timescale, timeDelta, offset) {
return presentationTime || presentationTime === 0 ? presentationTime / timescale : offset + timeDelta / timescale;
};

/**
* Checks the emsg box data for validity based on the version
* @param {number} version of the emsg box to validate
* @param {Object} emsg the emsg data to validate
* @returns if the box is valid as a boolean
*/
var isValidEmsgBox = function(version, emsg) {
var hasScheme = emsg.scheme_id_uri !== '\0'
var isValidV0Box = version === 0 && isDefined(emsg.presentation_time_delta) && hasScheme;
var isValidV1Box = version === 1 && isDefined(emsg.presentation_time) && hasScheme;
// Only valid versions of emsg are 0 and 1
return !(version > 1) && isValidV0Box || isValidV1Box;
};

// Utility function to check if an object is defined
var isDefined = function(data) {
return data !== undefined || data !== null;
};

module.exports = {
parseEmsgBox: parseEmsgBox,
scaleTime: scaleTime
};
30 changes: 28 additions & 2 deletions lib/mp4/probe.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,15 @@ var toUnsigned = require('../utils/bin').toUnsigned;
var toHexString = require('../utils/bin').toHexString;
var findBox = require('../mp4/find-box.js');
var parseType = require('../mp4/parse-type.js');
var emsg = require('../mp4/emsg.js');
var parseTfhd = require('../tools/parse-tfhd.js');
var parseTrun = require('../tools/parse-trun.js');
var parseTfdt = require('../tools/parse-tfdt.js');
var getUint64 = require('../utils/numbers.js').getUint64;
var timescale, startTime, compositionStartTime, getVideoTrackIds, getTracks,
getTimescaleFromMediaHeader;
getTimescaleFromMediaHeader, getEmsgID3;
var window = require('global/window');
var parseId3Frames = require('../tools/parse-id3.js').parseId3Frames;


/**
Expand Down Expand Up @@ -370,6 +372,29 @@ getTracks = function(init) {
return tracks;
};

/**
* Returns an array of emsg ID3 data from the provided segmentData.
* An offset can also be provided as the Latest Arrival Time to calculate
* the Event Start Time of v0 EMSG boxes.
* See: https://dashif-documents.azurewebsites.net/Events/master/event.html#Inband-event-timing
*
* @param {Uint8Array} segmentData the segment byte array.
* @param {number} offset the segment start time or Latest Arrival Time,
* @return {Object[]} an array of ID3 parsed from EMSG boxes
*/
getEmsgID3 = function(segmentData, offset = 0) {
var emsgBoxes = findBox(segmentData, ['emsg']);
return emsgBoxes.map((data) => {
var parsedBox = emsg.parseEmsgBox(new Uint8Array(data));
var parsedId3Frames = parseId3Frames(parsedBox.message_data);
return {
cueTime: emsg.scaleTime(parsedBox.presentation_time, parsedBox.timescale, parsedBox.presentation_time_delta, offset),
duration: emsg.scaleTime(parsedBox.event_duration, parsedBox.timescale),
frames: parsedId3Frames
};
});
};

module.exports = {
// export mp4 inspector's findBox and parseType for backwards compatibility
findBox: findBox,
Expand All @@ -379,5 +404,6 @@ module.exports = {
compositionStartTime: compositionStartTime,
videoTrackIds: getVideoTrackIds,
tracks: getTracks,
getTimescaleFromMediaHeader: getTimescaleFromMediaHeader
getTimescaleFromMediaHeader: getTimescaleFromMediaHeader,
getEmsgID3: getEmsgID3,
};
Loading

0 comments on commit a8146c7

Please sign in to comment.