SubtitlesParser
SubtitlesParser copied to clipboard
Stream is not in a valid Youtube XML format
YtXmlFormatParser.Parse
causes System.ArgumentException: 'Stream is not in a valid Youtube XML format'
The code is as follows
List<SubtitlesParser.Classes.SubtitleItem> subtitleItems;
var ytSubtitlesParser = new SubtitlesParser.Classes.Parsers.YtXmlFormatParser();
using (var stream = new MemoryStream(Encoding.UTF8.GetBytes(subtitles)))
{
subtitleItems = ytSubtitlesParser.ParseStream(stream, Encoding.UTF8);
}
YouTube captions attached yt-video-oPnDOxMXlUc.zip
private string ConvertYouTubeXmlToSrtFormat(string ytSubtitles)
{
var expression = new Regex("<p\\st=\"(?<timestamp>\\d+)\"\\sd=\"(?<duration>\\d+)\">(?<text>.*?)</p>", RegexOptions.Singleline);
var matchedSubtitles = expression.Matches(ytSubtitles);
if (matchedSubtitles.Count == 0) throw new Exception("Failed to extract subtitles");
var srtWriter = new StringBuilder(matchedSubtitles.Count * 50);
for (var i = 0; i < matchedSubtitles.Count; i++)
{
var matchedSubtitle = matchedSubtitles[i];
srtWriter.AppendLine((i + 1).ToString()); // sequence number
var timestamp = new TimeSpan(0, 0, 0, 0, matchedSubtitle.Groups["timestamp"].Value.ToInt32());
var duration = new TimeSpan(0, 0, 0, 0, matchedSubtitle.Groups["duration"].Value.ToInt32());
srtWriter.AppendLine($"{timestamp:hh\\:mm\\:ss\\,fff} --> {timestamp + duration:hh\\:mm\\:ss\\,fff}"); // timestamps
srtWriter.AppendLine(matchedSubtitle.Groups["text"].Value); // text
srtWriter.AppendLine();
}
return srtWriter.ToString();
}