refactor(utils/lyrics): support multi-timestamp lyrics (#1268)

* refactor(utils/lyrics): support multi-timestamp lyrics

Fully rewrited and improved the performance.

* refactor(utils/lyrics): remove useless .map

* fix(utils/lyrics): enhance the extraction logic
This commit is contained in:
pan93412 2022-01-27 16:25:45 +08:00 committed by GitHub
parent 7b7b8745b6
commit 8a6c13e62f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -7,38 +7,76 @@ export function lyricParser(lrc) {
}; };
} }
const extractTimeRegex = /^(?<rawTime>\[(?<min>\d+):(?<sec>\d+)(?:\.|:)(?<ms>\d+)\])\s*(?<content>.+)$/; // regexr.com/6e52n
const extractLrcRegex = /^(?<lyricTimestamps>(?:\[.+?\])+)(?!\[)(?<content>.+)$/gm;
const extractTimestampRegex = /\[(?<min>\d+):(?<sec>\d+)(?:\.|:)(?<ms>\d+)\]/g;
/**
* @typedef {{time: number, rawTime: string, content: string}} ParsedLyric
*/
/**
* Parse the lyric string.
*
* @param {string} lrc The `lrc` input.
* @returns {ParsedLyric[]} The parsed lyric.
* @example parseLyric("[00:00.00] Hello, World!\n[00:00.10] Test\n");
*/
function parseLyric(lrc) { function parseLyric(lrc) {
const lyrics = lrc.trim().split('\n'); /**
* A sorted list of parsed lyric and its timestamp.
*
* @type {ParsedLyric[]}
* @see binarySearch
*/
const parsedLyrics = [];
const parsedLyrics = lyrics /**
.map((/** @type {string} */ line) => { * Find the appropriate index to push our parsed lyric.
try { * @param {ParsedLyric} lyric
const extractedLyric = extractTimeRegex.exec(line); */
const binarySearch = lyric => {
let time = lyric.time;
// If this line is not a lyric. let low = 0;
if (!extractedLyric) throw 'This line is not a valid lyric.'; let high = parsedLyrics.length - 1;
// Otherwise, we extract the lyric part. while (low <= high) {
const { rawTime, min, sec, ms, content } = extractedLyric.groups; const mid = Math.floor((low + high) / 2);
const time = Number(min) * 60 + Number(sec) + 0.01 * Number(ms); const midTime = parsedLyrics[mid].time;
if (midTime === time) {
return { return mid;
time, } else if (midTime < time) {
rawTime, low = mid + 1;
content: trimContent(content), } else {
}; high = mid - 1;
} catch (e) {
console.debug(`lyrics.js: Failed to extract "${line}". ${e}`);
} }
}) }
.filter(response => !!response) // remove "undefined" entries
.sort((a, b) => a.time - b.time); return low;
};
for (const line of lrc.trim().matchAll(extractLrcRegex)) {
const { lyricTimestamps, content } = line.groups;
for (const timestamp of lyricTimestamps.matchAll(extractTimestampRegex)) {
const { min, sec, ms } = timestamp.groups;
const rawTime = timestamp[0];
const time = Number(min) * 60 + Number(sec) + 0.001 * Number(ms);
/** @type {ParsedLyric} */
const parsedLyric = { rawTime, time, content: trimContent(content) };
parsedLyrics.splice(binarySearch(parsedLyric), 0, parsedLyric);
}
}
return parsedLyrics; return parsedLyrics;
} }
/**
* @param {string} content
* @returns {string}
*/
function trimContent(content) { function trimContent(content) {
let t = content.trim(); let t = content.trim();
return t.length < 1 ? content : t; return t.length < 1 ? content : t;