Is Youtube change page structure again?
After #315, It seems like work a few days, but this week, the regular expression
string functNamePattern = @"\""signature"",\s?([a-zA-Z0-9\$]+)\("; in Decipherer.cs
got empty string
Did anyone get the same situation?
https://github.com/flagbug/YoutubeExtractor/issues/313#issuecomment-419663362
I don't know others how to fix it. In my situation, I can't find out the function name. And I mix two solution. #313 and https://github.com/i3arnon/libvideo/commit/85dba890c50221d7fd4a27f37a60b5ac316cf601#diff-064c53a6b75fe468598d6396ede2b27a
I change
string functNamePattern = @"\""signature"",\s?([a-zA-Z0-9\$]+)\(";
to
string functNamePattern = @"(\w+)&&(\w+)\.set\(\w+,(\w+)\(\1\)\);return\s+\2";
and change
string funcPattern = @"(?!h\.)" + @funcName + @"=function\(\w+\)\{.*?\}"; //Escape funcName string
to
string funcPattern = @"(?!h\.)" + @funcName + @"(\w+)\s*=\s*function\(\s*(\w+)\s*\)\s*{\s*\2\s*=\s*\2\.split\(\""\""\)\s*;(.+)return\s*\2\.join\(\""\""\)\s*}\s*;"; //Escape funcName string
And it works for me now, hope it will help you too. Hope Youtube won't change the structure.
According to youtube-dl, these are the new regexes that need to be added.
@sameer did you know where to update this?
@sameer did you know where to update this?
No but I can look.
@kelvinRosa Just look at the pull request "Changes to DownloadUrlResolver #333". I hope I have covered the changes done by youtube-dl.
My solution.
-
DownloadUrlResolver: ` public static class DownloadUrlResolver { private const string RateBypassFlag = "ratebypass"; private const string SignatureQuery = "sig";//signature
/// <summary> /// Decrypts the signature in the <see cref="VideoInfo.DownloadUrl" /> property and sets it /// to the decrypted URL. Use this method, if you have decryptSignature in the <see /// cref="GetDownloadUrls" /> method set to false. /// </summary> /// <param name="videoInfo">The video info which's downlaod URL should be decrypted.</param> /// <exception cref="YoutubeParseException"> /// There was an error while deciphering the signature. /// </exception> public static void DecryptDownloadUrl(VideoInfo videoInfo) { Regex regex = new Regex("&"+SignatureQuery+"=(.+)&"); Match match = regex.Match(videoInfo.DownloadUrl); if(match.Success) { string encryptedSignature = match.Groups[1].Value; string decrypted; try { decrypted = GetDecipheredSignature(videoInfo.HtmlPlayerVersion, encryptedSignature); } catch (Exception ex) { throw new YoutubeParseException("Could not decipher signature", ex); } videoInfo.DownloadUrl = regex.Replace(videoInfo.DownloadUrl, "&"+SignatureQuery+"="+ decrypted + "&"); videoInfo.RequiresDecryption = false; } } /// <summary> /// Gets a list of <see cref="VideoInfo" />s for the specified URL. /// </summary> /// <param name="videoUrl">The URL of the YouTube video.</param> /// <param name="decryptSignature"> /// A value indicating whether the video signatures should be decrypted or not. Decrypting /// consists of a HTTP request for each <see cref="VideoInfo" />, so you may want to set /// this to false and call <see cref="DecryptDownloadUrl" /> on your selected <see /// cref="VideoInfo" /> later. /// </param> /// <returns>A list of <see cref="VideoInfo" />s that can be used to download the video.</returns> /// <exception cref="ArgumentNullException"> /// The <paramref name="videoUrl" /> parameter is <c>null</c>. /// </exception> /// <exception cref="ArgumentException"> /// The <paramref name="videoUrl" /> parameter is not a valid YouTube URL. /// </exception> /// <exception cref="VideoNotAvailableException">The video is not available.</exception> /// <exception cref="WebException"> /// An error occurred while downloading the YouTube page html. /// </exception> /// <exception cref="YoutubeParseException">The Youtube page could not be parsed.</exception> public static IEnumerable<VideoInfo> GetDownloadUrls(string videoUrl, bool decryptSignature = true) { if (videoUrl == null) throw new ArgumentNullException("videoUrl"); bool isYoutubeUrl = TryNormalizeYoutubeUrl(videoUrl, out videoUrl); if (!isYoutubeUrl) { throw new ArgumentException("URL is not a valid youtube URL!"); } try { var json = LoadJson(videoUrl); string videoTitle = GetVideoTitle(json); IEnumerable<ExtractionInfo> downloadUrls = ExtractDownloadUrls(json); IEnumerable<VideoInfo> infos = GetVideoInfos(downloadUrls, videoTitle).ToList(); string htmlPlayerVersion = GetHtml5PlayerVersion(json); foreach (VideoInfo info in infos) { info.HtmlPlayerVersion = htmlPlayerVersion; if (decryptSignature && info.RequiresDecryption) { DecryptDownloadUrl(info); } } return infos; } catch (Exception ex) { if (ex is WebException || ex is VideoNotAvailableException) { throw; } ThrowYoutubeParseException(ex, videoUrl); } return null; // Will never happen, but the compiler requires it }
#if PORTABLE public static System.Threading.Tasks.Task<IEnumerable<VideoInfo>> GetDownloadUrlsAsync(string videoUrl, bool decryptSignature = true) { return System.Threading.Tasks.Task.Run(() => GetDownloadUrls(videoUrl, decryptSignature)); }
#endif
/// <summary>
/// Normalizes the given YouTube URL to the format http://youtube.com/watch?v={youtube-id}
/// and returns whether the normalization was successful or not.
/// </summary>
/// <param name="url">The YouTube URL to normalize.</param>
/// <param name="normalizedUrl">The normalized YouTube URL.</param>
/// <returns>
/// <c>true</c>, if the normalization was successful; <c>false</c>, if the URL is invalid.
/// </returns>
public static bool TryNormalizeYoutubeUrl(string url, out string normalizedUrl)
{
url = url.Trim();
url = url.Replace("youtu.be/", "youtube.com/watch?v=");
url = url.Replace("www.youtube", "youtube");
url = url.Replace("youtube.com/embed/", "youtube.com/watch?v=");
if (url.Contains("/v/"))
{
url = "http://youtube.com" + new Uri(url).AbsolutePath.Replace("/v/", "/watch?v=");
}
url = url.Replace("/watch#", "/watch?");
IDictionary<string, string> query = HttpHelper.ParseQueryString(url);
string v;
if (!query.TryGetValue("v", out v))
{
normalizedUrl = null;
return false;
}
normalizedUrl = "http://youtube.com/watch?v=" + v;
return true;
}
/*
private static IEnumerable<ExtractionInfo> ExtractDownloadUrls(JObject json)
{
string[] splitByUrls = GetStreamMap(json).Split(',');
string[] adaptiveFmtSplitByUrls = GetAdaptiveStreamMap(json).Split(',');
splitByUrls = splitByUrls.Concat(adaptiveFmtSplitByUrls).ToArray();
foreach (string s in splitByUrls)
{
IDictionary<string, string> queries = HttpHelper.ParseQueryString(s);
string url;
bool requiresDecryption = false;
if (queries.ContainsKey("s") || queries.ContainsKey("sig"))
{
requiresDecryption = queries.ContainsKey("s");
string signature = queries.ContainsKey("s") ? queries["s"] : queries["sig"];
url = string.Format("{0}&{1}={2}", queries["url"], SignatureQuery, signature);
string fallbackHost = queries.ContainsKey("fallback_host") ? "&fallback_host=" + queries["fallback_host"] : String.Empty;
url += fallbackHost;
}
else
{
url = queries["url"];
}
url = HttpHelper.UrlDecode(url);
url = HttpHelper.UrlDecode(url);
IDictionary<string, string> parameters = HttpHelper.ParseQueryString(url);
if (!parameters.ContainsKey(RateBypassFlag))
url += string.Format("&{0}={1}", RateBypassFlag, "yes");
yield return new ExtractionInfo { RequiresDecryption = requiresDecryption, Uri = new Uri(url) };
}
}
*/
//
private static IEnumerable<ExtractionInfo> ExtractDownloadUrls(JObject json)
{
var info = new List<ExtractionInfo>();
var formats = GetStreamMap(json);
var adaptiveFormats = GetAdaptiveStreamMap(json);
ExtractInfo(info, formats);
ExtractInfo(info, adaptiveFormats);
return info;
}
private static void ExtractInfo(List<ExtractionInfo> info, JToken items)
{
if (items != null)
{
foreach (var item in items)
{
string url = "";
bool requiresDecryption = false;
if (item["cipher"] != null )
{
string s = item["cipher"].ToString();
IDictionary<string, string> queries = HttpHelper.ParseQueryString(s);
if (queries.ContainsKey("s") || queries.ContainsKey("sig"))
{
requiresDecryption = queries.ContainsKey("s");
string signature = queries.ContainsKey("s") ? queries["s"] : queries["sig"];
url = string.Format("{0}&{1}={2}", queries["url"], SignatureQuery, signature);
string fallbackHost = queries.ContainsKey("fallback_host") ? "&fallback_host=" + queries["fallback_host"] : String.Empty;
url += fallbackHost;
}
}
else
{
url = item["url"].ToString();
}
url = HttpHelper.UrlDecode(url);
IDictionary<string, string> parameters = HttpHelper.ParseQueryString(url);
if (!parameters.ContainsKey(RateBypassFlag))
url += string.Format("&{0}={1}", RateBypassFlag, "yes");
info.Add( new ExtractionInfo { RequiresDecryption = requiresDecryption, Uri = new Uri(url) });
}
}
}
//
/*
private static string GetAdaptiveStreamMap(JObject json)
{
JToken streamMap = json["args"]["adaptive_fmts"];
// bugfix: adaptive_fmts is missing in some videos, use url_encoded_fmt_stream_map instead
if (streamMap == null)
{
streamMap = json["args"]["url_encoded_fmt_stream_map"];
}
return streamMap.ToString();
}
*/
private static JToken GetAdaptiveStreamMap(JObject json)
{
JToken adaptiveFormat = null;
JToken streamMap = json["args"]["player_response"];
string streamMapString = streamMap == null ? null : streamMap.ToString();
if (streamMapString != null)
{
JObject playerResponse = JObject.Parse(streamMapString);
var temp = playerResponse?["streamingData"]?["adaptiveFormats"];
adaptiveFormat = temp;
}
return adaptiveFormat;
}
private static string GetDecipheredSignature(string htmlPlayerVersion, string signature)
{
return Decipherer.DecipherWithVersion(signature, htmlPlayerVersion);
}
/*
private static string GetHtml5PlayerVersion(JObject json)
{
var regex = new Regex(@"player(.+?).js");
string js = json["assets"]["js"].ToString();
return regex.Match(js).Result("$1");
}
*/
private static string GetHtml5PlayerVersion(JObject json)
{
var regex = new Regex(@"player[-|_](.+?).js");
string js = json["assets"]["js"].ToString();
return regex.Match(js).Result("$1");
}
/*
private static string GetStreamMap(JObject json)
{
JToken streamMap = json["args"]["url_encoded_fmt_stream_map"];
string streamMapString = streamMap == null ? null : streamMap.ToString();
if (streamMapString == null || streamMapString.Contains("been+removed"))
{
throw new VideoNotAvailableException("Video is removed or has an age restriction.");
}
return streamMapString;
}
*/
private static JToken GetStreamMap(JObject json)
{
JToken streamMap = json["args"]["player_response"];
string streamMapString = streamMap == null ? null : streamMap.ToString();
if (streamMapString == null || streamMapString.Contains("been+removed"))
{
throw new Exception("Video is removed or has an age restriction.");
}
JObject playerResponse = JObject.Parse(streamMapString);
JToken temp = playerResponse?["streamingData"]?["adaptiveFormats"];
return temp;
}
private static IEnumerable<VideoInfo> GetVideoInfos(IEnumerable<ExtractionInfo> extractionInfos, string videoTitle)
{
var downLoadInfos = new List<VideoInfo>();
foreach (ExtractionInfo extractionInfo in extractionInfos)
{
string itag = HttpHelper.ParseQueryString(extractionInfo.Uri.Query)["itag"];
int formatCode = int.Parse(itag);
VideoInfo info = VideoInfo.Defaults.SingleOrDefault(videoInfo => videoInfo.FormatCode == formatCode);
if (info != null)
{
info = new VideoInfo(info)
{
DownloadUrl = extractionInfo.Uri.ToString(),
Title = videoTitle,
RequiresDecryption = extractionInfo.RequiresDecryption
};
}
else
{
info = new VideoInfo(formatCode)
{
DownloadUrl = extractionInfo.Uri.ToString()
};
}
downLoadInfos.Add(info);
}
return downLoadInfos;
}
private static string GetVideoTitle(JObject json)
{
JToken title = json["args"]["title"];
return title == null ? String.Empty : title.ToString();
}
private static bool IsVideoUnavailable(string pageSource)
{
const string unavailableContainer = "<div id=\"watch-player-unavailable\">";
return pageSource.Contains(unavailableContainer);
}
private static JObject LoadJson(string url)
{
string pageSource = HttpHelper.DownloadString(url);
if (IsVideoUnavailable(pageSource))
{
throw new VideoNotAvailableException();
}
var dataRegex = new Regex(@"ytplayer\.config\s*=\s*(\{.+?\});", RegexOptions.Multiline);
string extractedJson = dataRegex.Match(pageSource).Result("$1");
return JObject.Parse(extractedJson);
}
private static void ThrowYoutubeParseException(Exception innerException, string videoUrl)
{
throw new YoutubeParseException("Could not parse the Youtube page for URL " + videoUrl + "\n" +
"This may be due to a change of the Youtube page structure.\n" +
"Please report this bug at www.github.com/flagbug/YoutubeExtractor/issues", innerException);
}
private class ExtractionInfo
{
public bool RequiresDecryption { get; set; }
public Uri Uri { get; set; }
}
}
}`
-
Decipherer: `internal static class Decipherer {
public static string DecipherWithVersion(string cipher, string cipherVersion) { string jsUrl = string.Format("http://s.ytimg.com/yts/jsbin/player_{0}.js", cipherVersion);//_ string js = HttpHelper.DownloadString(jsUrl); //Find "C" in this: var A = B.sig||C (B.s) //string functNamePattern = @"\""signature"",\s?([a-zA-Z0-9\$]+)\("; //Regex Formed To Find Word or DollarSign string functNamePattern = @"(\w+)=function\(\w+\){(\w+)=\2\.split\(\x22{2}\);.*?return\s+\2\.join\(\x22{2}\)}"; var funcName = Regex.Match(js, functNamePattern).Groups[1].Value; if(string.IsNullOrEmpty(funcName)) throw new NotImplementedException("Couldn't find signature function."); if (funcName.Contains("$")) funcName = "\\" + funcName; //Due To Dollar Sign Introduction, Need To Escape string funcPattern = @"(?!h\.)" + @funcName + @"=function\(\w+\)\{.*?\}"; //Escape funcName string var funcBody = Regex.Match(js, funcPattern, RegexOptions.Singleline).Value; //Entire sig function var lines = funcBody.Split(';'); //Each line in sig function string functionIdentifier = ""; string operations = ""; foreach (var line in lines) //Matches the funcBody with each cipher method. Only runs till all three are defined. { Match m; // // Get the name of the function called in this statement functionIdentifier = Regex.Match(line, @"\w+(?:.|\[)(\""?\w+(?:\"")?)\]?\(").Groups[1].Value; if (string.IsNullOrWhiteSpace(functionIdentifier)) continue; // string reReverse = string.Format(@"{0}:\bfunction\b\(\w+\)", functionIdentifier); //Regex for reverse (one parameter) string reSlice = string.Format(@"{0}:\bfunction\b\([a],b\).(\breturn\b)?.?\w+\.", functionIdentifier); //Regex for slice (return or not) string reSwap = string.Format(@"{0}:\bfunction\b\(\w+\,\w\).\bvar\b.\bc=a\b", functionIdentifier); //Regex for the char swap. if ((m=Regex.Match(js, reSlice)).Success) { if ((m = Regex.Match(line, @"\(\w+,(?<index>\d+)\)")).Success) operations += "s" + m.Groups["index"].Value + " "; //operation is a slice } else if ((m=Regex.Match(js, reSwap)).Success) { if ((m = Regex.Match(line, @"\(\w+,(?<index>\d+)\)")).Success) operations += "w" + m.Groups["index"].Value + " "; //operation is a swap (w) } else if((m = Regex.Match(js, reReverse)).Success) { operations += "r "; //operation is a reverse } } operations = operations.Trim(); return DecipherWithOperations(cipher, operations); } private static string ApplyOperation(string cipher, string op) { switch (op[0]) { case 'r'://reverse return new string(cipher.ToCharArray().Reverse().ToArray()); case 'w'://Swap { int index = GetOpIndex(op); return SwapFirstChar(cipher, index); } case 's'://Slice { int index = GetOpIndex(op); return cipher.Substring(index); } default: throw new NotImplementedException("Couldn't find cipher operation."); } } private static string DecipherWithOperations(string cipher, string operations) { return operations.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries).Aggregate(cipher, ApplyOperation); } private static int GetOpIndex(string op) { string parsed = new Regex(@".(\d+)").Match(op).Result("$1"); int index = Int32.Parse(parsed); return index; } private static string SwapFirstChar(string cipher, int index) { var builder = new StringBuilder(cipher); builder[0] = cipher[index]; builder[index] = cipher[0]; return builder.ToString(); }}`
based on YoutubeExplode.