|
|
@ -22,7 +22,22 @@ var crypto = require('crypto'); |
|
|
|
var http = require('http'); |
|
|
|
var http = require('http'); |
|
|
|
var https = require('https'); |
|
|
|
var https = require('https'); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
function rewriteWebArchiveUrl(url) { |
|
|
|
|
|
|
|
// Web Archive URLs need to be transformed to add `if_` after the ID.
|
|
|
|
|
|
|
|
// Without this, an HTML page containing an iframe with the PDF file
|
|
|
|
|
|
|
|
// will be served instead (issue 8920).
|
|
|
|
|
|
|
|
var webArchiveRegex = |
|
|
|
|
|
|
|
/(^https?:\/\/web\.archive\.org\/web\/)(\d+)(\/https?:\/\/.+)/g; |
|
|
|
|
|
|
|
var urlParts = webArchiveRegex.exec(url); |
|
|
|
|
|
|
|
if (urlParts) { |
|
|
|
|
|
|
|
return urlParts[1] + (urlParts[2] + 'if_') + urlParts[3]; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
return url; |
|
|
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
function downloadFile(file, url, callback, redirects) { |
|
|
|
function downloadFile(file, url, callback, redirects) { |
|
|
|
|
|
|
|
url = rewriteWebArchiveUrl(url); |
|
|
|
|
|
|
|
|
|
|
|
var completed = false; |
|
|
|
var completed = false; |
|
|
|
var protocol = /^https:\/\//.test(url) ? https : http; |
|
|
|
var protocol = /^https:\/\//.test(url) ? https : http; |
|
|
|
protocol.get(url, function (response) { |
|
|
|
protocol.get(url, function (response) { |
|
|
|