Add better logging to the snapshot generator for timing purposes and make sure the PhantomJS script always exists after a maximum of 10 seconds.
This commit is contained in:
parent
29dc7fd079
commit
1c0c551d00
2 changed files with 45 additions and 23 deletions
|
@ -1,10 +1,22 @@
|
||||||
var system = require('system');
|
var system = require('system');
|
||||||
var url = system.args[1] || '';
|
var url = system.args[1] || '';
|
||||||
|
var count = 0;
|
||||||
|
|
||||||
if(url.length > 0) {
|
if(url.length > 0) {
|
||||||
var page = require('webpage').create();
|
var page = require('webpage').create();
|
||||||
page.open(url, function (status) {
|
page.open(url, function (status) {
|
||||||
|
try {
|
||||||
if (status == 'success') {
|
if (status == 'success') {
|
||||||
var delay, checker = (function() {
|
var delay;
|
||||||
|
var checker = (function() {
|
||||||
|
count++;
|
||||||
|
|
||||||
|
if (count > 100) {
|
||||||
|
console.log('Not Found');
|
||||||
|
phantom.exit();
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
var html = page.evaluate(function () {
|
var html = page.evaluate(function () {
|
||||||
var found = document.getElementsByTagName('html')[0].outerHTML || '';
|
var found = document.getElementsByTagName('html')[0].outerHTML || '';
|
||||||
if (window.__isLoading && !window.__isLoading()) {
|
if (window.__isLoading && !window.__isLoading()) {
|
||||||
|
@ -33,5 +45,11 @@ if(url.length > 0) {
|
||||||
console.log('Not Found');
|
console.log('Not Found');
|
||||||
phantom.exit();
|
phantom.exit();
|
||||||
}
|
}
|
||||||
});
|
} catch (e) {
|
||||||
|
console.log('Not Found');
|
||||||
|
phantom.exit();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
phantom.exit();
|
||||||
}
|
}
|
|
@ -3,12 +3,12 @@ import logging
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def render_snapshot(url):
|
def render_snapshot(url):
|
||||||
logger.info('Snapshotting url: %s' % url)
|
logger.info('Snapshotting url: %s' % url)
|
||||||
|
|
||||||
out_html = subprocess.check_output(['phantomjs', '--ignore-ssl-errors=yes',
|
out_html = subprocess.check_output(['phantomjs', '--ignore-ssl-errors=yes',
|
||||||
'util/phantomjs-runner.js', url])
|
'util/phantomjs-runner.js', url])
|
||||||
|
|
||||||
|
@ -16,9 +16,13 @@ def render_snapshot(url):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
# Remove script tags
|
# Remove script tags
|
||||||
|
logger.info('Removing script tags: %s' % url)
|
||||||
|
|
||||||
soup = BeautifulSoup(out_html.decode('utf8'))
|
soup = BeautifulSoup(out_html.decode('utf8'))
|
||||||
to_extract = soup.findAll('script')
|
to_extract = soup.findAll('script')
|
||||||
for item in to_extract:
|
for item in to_extract:
|
||||||
item.extract()
|
item.extract()
|
||||||
|
|
||||||
|
logger.info('Snapshotted url: %s' % url)
|
||||||
|
|
||||||
return str(soup)
|
return str(soup)
|
||||||
|
|
Reference in a new issue