Skip to content

Commit 494d000

Browse files
author
Giorgio Premi
committed
Follow meta-refresh redirect
1 parent 5c2477f commit 494d000

File tree

1 file changed

+98
-2
lines changed

1 file changed

+98
-2
lines changed

stream.js

Lines changed: 98 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,106 @@ page.viewportSize = {
5959
page.customHeaders = opts.headers || {};
6060
page.zoomFactor = opts.scale;
6161

62-
page.open(opts.url, function (status) {
62+
var previousCount = 0;
63+
var previousPage = null;
64+
65+
function pageOnLoadFinished(status) {
6366
if (status === 'fail') {
6467
console.error('Couldn\'t load url: ' + opts.url);
6568
phantom.exit(1);
6669
return;
6770
}
6871

72+
if (previousPage === page.url) {
73+
if (++ previousCount > 3) {
74+
console.error('Loop detected: ' + page.url);
75+
phantom.exit(1);
76+
return;
77+
}
78+
} else {
79+
previousPage = page.url;
80+
previousCount = 0;
81+
}
82+
83+
var threshold = 10;
84+
var refresh = page.evaluate(function(threshold) {
85+
var patternMetaContent = /^\s*(\d+)(?:\s*;(?:\s*url\s*=)?\s*(.+)?)?$/i;
86+
var parseMetaRefresh = function (content) {
87+
// base code from https://github.com/stevenvachon/http-equiv-refresh
88+
var result = { timeout: null, url: null };
89+
90+
content = patternMetaContent.exec(content);
91+
92+
if (content === null) {
93+
return result;
94+
}
95+
96+
if (content[1] !== undefined) {
97+
result.timeout = parseInt( content[1] );
98+
}
99+
100+
if (content[2] !== undefined) {
101+
var url = (content[2] + '').trim();
102+
103+
if (url.length) {
104+
var firstChar = url[0];
105+
var lastChar = url[url.length-1];
106+
107+
// Remove a single level of encapsulating quotes
108+
if (firstChar==="'" && lastChar==="'" || firstChar==='"' && lastChar==='"') {
109+
if (url.length > 2) {
110+
url = url.substr(1, url.length-2).trim();
111+
}
112+
}
113+
}
114+
115+
if (url.length) {
116+
result.url = url;
117+
}
118+
}
119+
120+
return result;
121+
};
122+
123+
var metas = [];
124+
var tags = document.head.querySelectorAll('[http-equiv="refresh"]');
125+
for (var i = 0, len = tags.length; i < len; ++ i) {
126+
if (tags[i].tagName === 'META') {
127+
metas.push(tags[i].content || tags[i].CONTENT);
128+
}
129+
};
130+
131+
var refresh = null;
132+
var minTime = Number.POSITIVE_INFINITY;
133+
var i = 0;
134+
135+
for (var i = 0, len = metas.length; i < len; ++ i) {
136+
var currRefresh = parseMetaRefresh(metas[i]);
137+
138+
if (currRefresh.timeout <= threshold && currRefresh.timeout < minTime) {
139+
minTime = currRefresh.timeout;
140+
refreshUrl = currRefresh; // currRefresh.url could be null
141+
}
142+
}
143+
144+
return refreshUrl;
145+
}, threshold);
146+
147+
if (null !== refresh) {
148+
page.onLoadFinished = pageOnLoadFinished;
149+
150+
if (refresh.timeout > 0) { // when is 0, is already triggered by phantom apparently
151+
page.evaluate(function (refresh) {
152+
if (null === refresh.url) {
153+
window.location.reload();
154+
} else {
155+
window.location.replace(refresh.url);
156+
}
157+
}, refresh);
158+
}
159+
return;
160+
}
161+
69162
if (opts.crop) {
70163
page.clipRect = {
71164
top: 0,
@@ -124,4 +217,7 @@ page.open(opts.url, function (status) {
124217
log.call(console, page.renderBase64(opts.format));
125218
phantom.exit();
126219
}, opts.delay * 1000);
127-
});
220+
}
221+
222+
page.onLoadFinished = pageOnLoadFinished;
223+
page.open(opts.url);

0 commit comments

Comments
 (0)