6 #include "jsfunctions.h"
8 #include "videodefinition.h"
16 static const QString jsNameChars = "a-zA-Z0-9\\$_";
19 YTVideo::YTVideo(const QString &videoId, QObject *parent)
20 : QObject(parent), videoId(videoId), definitionCode(0), elIndex(0), ageGate(false),
21 loadingStreamUrl(false) {}
23 void YTVideo::loadStreamUrl() {
24 if (loadingStreamUrl) {
25 qDebug() << "Already loading stream URL for" << videoId;
28 loadingStreamUrl = true;
31 webPageLoaded = false;
37 void YTVideo::getVideoInfo() {
38 static const QStringList elTypes = {"&el=embedded", "&el=detailpage", "&el=vevo", ""};
41 if (elIndex == elTypes.size()) {
42 qDebug() << "Trying special embedded el param";
43 url = QUrl("https://www.youtube.com/get_video_info");
45 q.addQueryItem("video_id", videoId);
46 q.addQueryItem("el", "embedded");
47 q.addQueryItem("gl", "US");
48 q.addQueryItem("hl", "en");
49 q.addQueryItem("eurl", "https://youtube.googleapis.com/v/" + videoId);
50 q.addQueryItem("asv", "3");
51 q.addQueryItem("sts", "1588");
53 } else if (elIndex > elTypes.size() - 1) {
54 qDebug() << "Cannot get video info";
56 // no video info file, but we can try loading the "urlmap" from the web page
59 emitError("Cannot get video info");
63 // qDebug() << "Trying el param:" << elTypes.at(elIndex) << elIndex;
64 url = QUrl(QString("https://www.youtube.com/"
65 "get_video_info?video_id=%1%2&ps=default&eurl=&gl=US&hl=en")
66 .arg(videoId, elTypes.at(elIndex)));
69 QObject *reply = HttpUtils::yt().get(url);
70 connect(reply, SIGNAL(data(QByteArray)), SLOT(gotVideoInfo(QByteArray)));
71 connect(reply, SIGNAL(error(QString)), SLOT(emitError(QString)));
73 // see you in gotVideoInfo...
76 void YTVideo::gotVideoInfo(const QByteArray &bytes) {
77 QString videoInfo = QString::fromUtf8(bytes);
78 // qDebug() << "videoInfo" << videoInfo;
80 // get player_response
81 static const QRegExp playerResponseRE("&player_response=([^&]+)");
82 if (playerResponseRE.indexIn(videoInfo) != -1) {
83 QString playerResponse = playerResponseRE.cap(1);
84 QByteArray playerResponseUtf8 = QByteArray::fromPercentEncoding(playerResponse.toUtf8());
85 // qDebug() << "player_response" << playerResponseUtf8;
86 QJsonDocument doc = QJsonDocument::fromJson(playerResponseUtf8);
87 QJsonObject obj = doc.object();
88 if (obj.contains("streamingData")) {
89 auto parseFormats = [this](const QJsonArray &formats) {
90 for (const QJsonValue &format : formats) {
91 QJsonObject formatObj = format.toObject();
92 int itag = formatObj["itag"].toInt();
93 QString url = formatObj["url"].toString();
95 QString cipher = formatObj["cipher"].toString();
97 qDebug() << "Cipher is " << q.toString();
98 url = q.queryItemValue("url").trimmed();
99 // while (url.contains('%'))
100 url = QByteArray::fromPercentEncoding(url.toUtf8());
101 if (q.hasQueryItem("s")) {
102 QString s = q.queryItemValue("s");
103 qDebug() << "s is" << s;
104 s = decryptSignature(s);
106 qDebug() << "Added signature" << s;
112 // qDebug() << "player_response format" << itag << url;
113 if (!url.isEmpty()) urlMap.insert(itag, url);
116 QJsonObject streamingDataObj = obj["streamingData"].toObject();
117 // qDebug() << "Found streamingData" << streamingDataObj;
118 parseFormats(streamingDataObj["formats"].toArray());
119 parseFormats(streamingDataObj["adaptiveFormats"].toArray());
125 static const QRegExp videoTokeRE(JsFunctions::instance()->videoTokenRE());
126 if (videoTokeRE.indexIn(videoInfo) == -1) {
127 qDebug() << "Cannot get token. Trying next el param" << videoTokeRE.pattern() << videoInfo;
128 // Don't panic! We're gonna try another magic "el" param
134 QString videoToken = videoTokeRE.cap(1);
135 while (videoToken.contains('%'))
136 videoToken = QByteArray::fromPercentEncoding(videoToken.toLatin1());
137 qDebug() << "videoToken" << videoToken;
138 this->videoToken = videoToken;
141 static const QRegExp fmtMapRE(JsFunctions::instance()->videoInfoFmtMapRE());
142 if (fmtMapRE.indexIn(videoInfo) == -1) {
143 qDebug() << "Cannot get urlMap. Trying next el param";
144 // Don't panic! We're gonna try another magic "el" param
149 QString fmtUrlMap = fmtMapRE.cap(1);
150 // qDebug() << "got fmtUrlMap" << fmtUrlMap;
151 fmtUrlMap = QByteArray::fromPercentEncoding(fmtUrlMap.toUtf8());
154 if (urlMap.isEmpty()) {
160 qDebug() << "Got token and urlMap" << elIndex << videoToken << fmtUrlMap;
161 parseFmtUrlMap(fmtUrlMap);
164 void YTVideo::parseFmtUrlMap(const QString &fmtUrlMap) {
166 const VideoDefinition &definition = YT3::instance().maxVideoDefinition();
168 // qDebug() << "fmtUrlMap" << fmtUrlMap;
169 const QVector<QStringRef> formatUrls = fmtUrlMap.splitRef(',', QString::SkipEmptyParts);
171 for (const QStringRef &formatUrl : formatUrls) {
172 // qDebug() << "formatUrl" << formatUrl;
173 const QVector<QStringRef> urlParams = formatUrl.split('&', QString::SkipEmptyParts);
174 // qDebug() << "urlParams" << urlParams;
180 for (const QStringRef &urlParam : urlParams) {
181 qDebug() << "urlParam" << urlParam;
182 if (sp.isNull() && urlParam.startsWith(QLatin1String("sp"))) {
183 int separator = urlParam.indexOf('=');
184 sp = urlParam.mid(separator + 1);
186 if (urlParam.startsWith(QLatin1String("itag="))) {
187 int separator = urlParam.indexOf('=');
188 format = urlParam.mid(separator + 1).toInt();
189 } else if (urlParam.startsWith(QLatin1String("url="))) {
190 int separator = urlParam.indexOf('=');
191 url = QByteArray::fromPercentEncoding(urlParam.mid(separator + 1).toUtf8());
192 } else if (urlParam.startsWith(QLatin1String("sig="))) {
193 int separator = urlParam.indexOf('=');
194 sig = QByteArray::fromPercentEncoding(urlParam.mid(separator + 1).toUtf8());
195 } else if (urlParam.startsWith(QLatin1String("s="))) {
196 if (webPageLoaded || ageGate) {
197 int separator = urlParam.indexOf('=');
198 sig = QByteArray::fromPercentEncoding(urlParam.mid(separator + 1).toUtf8());
199 sig = decryptSignature(sig);
200 if (sig.isEmpty()) sig = JsFunctions::instance()->decryptSignature(sig);
201 if (sig.isEmpty()) qWarning() << "Empty signature";
208 if (format == -1 || url.isNull()) continue;
210 if (!sig.isEmpty()) {
212 url += QLatin1String("&signature=") + sig;
214 url += '&' + sp + '=' + sig;
217 if (!url.contains(QLatin1String("ratebypass"))) url += QLatin1String("&ratebypass=yes");
220 if (format == definition.getCode()) {
221 qDebug() << "Found format" << format;
222 if (definition.hasAudio()) {
223 // we found the exact match with an audio/video stream
224 saveDefinitionForUrl(url, definition);
227 videoFormat = format;
229 urlMap.insert(format, url);
232 if (!webPageLoaded && !ageGate) {
237 if (videoFormat != 0) {
238 // exact match with video stream was found
239 const VideoDefinition &definition = VideoDefinition::forCode(videoFormat);
240 saveDefinitionForUrl(urlMap.value(videoFormat), definition);
244 qDebug() << "available formats" << urlMap.keys();
245 const QVector<VideoDefinition> &definitions = VideoDefinition::getDefinitions();
246 int previousIndex = std::max(definitions.indexOf(definition) - 1, 0);
247 for (; previousIndex >= 0; previousIndex--) {
248 const VideoDefinition &previousDefinition = definitions.at(previousIndex);
249 qDebug() << "Testing format" << previousDefinition.getCode();
250 if (urlMap.contains(previousDefinition.getCode())) {
251 qDebug() << "Found format" << previousDefinition.getCode();
252 saveDefinitionForUrl(urlMap.value(previousDefinition.getCode()), previousDefinition);
257 emit errorStreamUrl(tr("Cannot get video stream for %1").arg(videoId));
260 void YTVideo::loadWebPage() {
261 QUrl url("https://www.youtube.com/watch");
263 q.addQueryItem("v", videoId);
264 q.addQueryItem("gl", "US");
265 q.addQueryItem("hl", "en");
266 q.addQueryItem("has_verified", "1");
267 q.addQueryItem("bpctr", "9999999999");
270 // QUrl url("https://www.youtube.com/embed/" + videoId);
272 qDebug() << "Loading webpage" << url;
273 QObject *reply = HttpUtils::yt().get(url);
274 connect(reply, SIGNAL(data(QByteArray)), SLOT(scrapeWebPage(QByteArray)));
275 connect(reply, SIGNAL(error(QString)), SLOT(emitError(QString)));
276 // see you in scrapWebPage(QByteArray)
279 void YTVideo::emitError(const QString &message) {
280 qWarning() << message;
281 emit errorStreamUrl(message);
284 void YTVideo::scrapeWebPage(const QByteArray &bytes) {
285 webPageLoaded = true;
287 const QString html = QString::fromUtf8(bytes);
288 // qDebug() << "scrapeWebPage" << html;
290 static const QRegExp ageGateRE(JsFunctions::instance()->ageGateRE());
291 if (ageGateRE.indexIn(html) != -1) {
292 qDebug() << "Found ageGate";
299 // "\"url_encoded_fmt_stream_map\":\s*\"([^\"]+)\""
300 static const QRegExp fmtMapRE(JsFunctions::instance()->webPageFmtMapRE());
301 if (fmtMapRE.indexIn(html) != -1) {
302 fmtUrlMap = fmtMapRE.cap(1);
303 fmtUrlMap.replace("\\u0026", "&");
306 QRegExp adaptiveFormatsRE("\"adaptive_fmts\":\\s*\"([^\"]+)\"");
307 if (adaptiveFormatsRE.indexIn(html) != -1) {
308 qDebug() << "Found adaptive_fmts";
309 if (!fmtUrlMap.isEmpty()) fmtUrlMap += ',';
310 fmtUrlMap += adaptiveFormatsRE.cap(1).replace("\\u0026", "&");
313 if (fmtUrlMap.isEmpty() && urlMap.isEmpty()) {
314 qWarning() << "Cannot get fmtUrlMap from video page. Trying next el";
320 static const QRegExp jsPlayerRe(JsFunctions::instance()->jsPlayerRE());
321 if (jsPlayerRe.indexIn(html) != -1) {
322 QString jsPlayerUrl = jsPlayerRe.cap(1);
323 jsPlayerUrl.remove('\\');
324 if (jsPlayerUrl.startsWith(QLatin1String("//"))) {
325 jsPlayerUrl = QLatin1String("https:") + jsPlayerUrl;
326 } else if (jsPlayerUrl.startsWith("/")) {
327 jsPlayerUrl = QLatin1String("https://youtube.com") + jsPlayerUrl;
329 // qDebug() << "jsPlayerUrl" << jsPlayerUrl;
331 QRegExp jsPlayerIdRe("-(.+)\\.js");
332 jsPlayerIdRe.indexIn(jsPlayerUrl);
333 QString jsPlayerId = jsPlayerRe.cap(1);
335 QObject *reply = HttpUtils::yt().get(jsPlayerUrl);
336 connect(reply, SIGNAL(data(QByteArray)), SLOT(parseJsPlayer(QByteArray)));
337 connect(reply, SIGNAL(error(QString)), SLOT(emitError(QString)));
341 void YTVideo::parseJsPlayer(const QByteArray &bytes) {
342 jsPlayer = QString::fromUtf8(bytes);
343 // qDebug() << "jsPlayer" << jsPlayer;
345 // QRegExp funcNameRe("[\"']signature[\"']\\s*,\\s*([" + jsNameChars + "]+)\\(");
346 static const QVector<QRegExp> funcNameRes = [] {
347 QVector<QRegExp> res;
348 for (const QString &s : JsFunctions::instance()->signatureFunctionNameREs()) {
349 res << QRegExp(s.arg(jsNameChars));
353 for (const QRegExp &funcNameRe : funcNameRes) {
354 if (funcNameRe.indexIn(jsPlayer) == -1) {
355 qDebug() << "Cannot capture signature function name" << funcNameRe;
358 sigFuncName = funcNameRe.cap(1);
359 qDebug() << "Captures" << funcNameRe.captureCount() << funcNameRe.capturedTexts();
360 if (sigFuncName.isEmpty()) {
361 qDebug() << "Empty capture for" << funcNameRe;
364 captureFunction(sigFuncName, jsPlayer);
365 qDebug() << sigFunctions << sigObjects;
369 if (sigFuncName.isEmpty()) qDebug() << "Empty signature function name";
371 // parseFmtUrlMap(fmtUrlMap, true);
375 void YTVideo::captureFunction(const QString &name, const QString &js) {
376 qDebug() << __PRETTY_FUNCTION__ << name;
377 const QString argsAndBody =
378 QLatin1String("\\s*\\([") + jsNameChars + QLatin1String(",\\s]*\\)\\s*\\{[^\\}]+\\}");
380 QRegExp funcRe(QLatin1String("function\\s+") + QRegExp::escape(name) + argsAndBody);
381 if (funcRe.indexIn(js) != -1) {
382 func = funcRe.cap(0);
384 // try var foo = function(bar) { };
385 funcRe = QRegExp(QLatin1String("var\\s+") + QRegExp::escape(name) +
386 QLatin1String("\\s*=\\s*function") + argsAndBody);
387 if (funcRe.indexIn(js) != -1) {
388 func = funcRe.cap(0);
390 // try ,gr= function(bar) { };
391 funcRe = QRegExp(QLatin1String("[,\\s;}\\.\\)](") + QRegExp::escape(name) +
392 QLatin1String("\\s*=\\s*function") + argsAndBody + ")");
393 if (funcRe.indexIn(js) != -1) {
394 func = funcRe.cap(1);
396 qWarning() << "Cannot capture function" << name;
401 sigFunctions.insert(name, func);
403 // capture inner functions
404 static const QRegExp invokedFuncRe(QLatin1String("[\\s=;\\(]([") + jsNameChars +
405 QLatin1String("]+)\\s*\\([") + jsNameChars +
406 QLatin1String(",\\s]+\\)"));
407 int pos = name.length() + 9;
408 while ((pos = invokedFuncRe.indexIn(func, pos)) != -1) {
409 QString funcName = invokedFuncRe.cap(1);
410 if (!sigFunctions.contains(funcName)) captureFunction(funcName, js);
411 pos += invokedFuncRe.matchedLength();
414 // capture referenced objects
415 static const QRegExp objRe(QLatin1String("[\\s=;\\(]([") + jsNameChars +
416 QLatin1String("]+)\\.[") + jsNameChars + QLatin1String("]+"));
417 pos = name.length() + 9;
418 while ((pos = objRe.indexIn(func, pos)) != -1) {
419 QString objName = objRe.cap(1);
420 if (!sigObjects.contains(objName)) captureObject(objName, js);
421 pos += objRe.matchedLength();
425 void YTVideo::captureObject(const QString &name, const QString &js) {
426 QRegExp re(QLatin1String("var\\s+") + QRegExp::escape(name) +
427 QLatin1String("\\s*=\\s*\\{.*\\}\\s*;"));
429 if (re.indexIn(js) == -1) {
430 qWarning() << "Cannot capture object" << name;
433 QString obj = re.cap(0);
434 sigObjects.insert(name, obj);
437 QString YTVideo::decryptSignature(const QString &s) {
438 qDebug() << "decryptSignature" << sigFuncName << sigFunctions << sigObjects;
439 if (sigFuncName.isEmpty()) return QString();
441 for (const QString &f : sigObjects) {
442 QJSValue value = engine.evaluate(f);
443 if (value.isError()) qWarning() << "Error in" << f << value.toString();
445 for (const QString &f : sigFunctions) {
446 QJSValue value = engine.evaluate(f);
447 if (value.isError()) qWarning() << "Error in" << f << value.toString();
449 QString js = sigFuncName + "('" + s + "');";
450 QJSValue value = engine.evaluate(js);
452 if (value.isUndefined()) {
453 qWarning() << "Undefined result for" << js;
456 if (value.isError()) {
457 qWarning() << "Error in" << js << value.toString();
462 engine2.evaluate(jsPlayer);
463 value = engine2.evaluate(js);
464 if (value.isUndefined()) {
465 qWarning() << "Undefined result for" << js;
468 if (value.isError()) {
469 qWarning() << "Error in" << js << value.toString();
473 if (error) return QString();
474 return value.toString();
477 void YTVideo::saveDefinitionForUrl(const QString &url, const VideoDefinition &definition) {
478 qDebug() << "Selected video format" << definition.getCode() << definition.getName()
479 << definition.hasAudio();
481 definitionCode = definition.getCode();
484 if (!definition.hasAudio()) {
485 qDebug() << "Finding audio format";
486 static const QVector<int> audioFormats({251, 171, 140});
487 for (int audioFormat : audioFormats) {
488 qDebug() << "Trying audio format" << audioFormat;
489 auto i = urlMap.constFind(audioFormat);
490 if (i != urlMap.constEnd()) {
491 qDebug() << "Found audio format" << i.value();
492 audioUrl = i.value();
498 loadingStreamUrl = false;
499 emit gotStreamUrl(url, audioUrl);