Add better logging to the snapshot generator for timing purposes and make sure the PhantomJS script always exists after a maximum of 10 seconds.
This commit is contained in:
parent
29dc7fd079
commit
1c0c551d00
2 changed files with 45 additions and 23 deletions
|
@ -3,12 +3,12 @@ import logging
|
|||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def render_snapshot(url):
|
||||
logger.info('Snapshotting url: %s' % url)
|
||||
|
||||
out_html = subprocess.check_output(['phantomjs', '--ignore-ssl-errors=yes',
|
||||
'util/phantomjs-runner.js', url])
|
||||
|
||||
|
@ -16,9 +16,13 @@ def render_snapshot(url):
|
|||
return None
|
||||
|
||||
# Remove script tags
|
||||
logger.info('Removing script tags: %s' % url)
|
||||
|
||||
soup = BeautifulSoup(out_html.decode('utf8'))
|
||||
to_extract = soup.findAll('script')
|
||||
for item in to_extract:
|
||||
item.extract()
|
||||
|
||||
logger.info('Snapshotted url: %s' % url)
|
||||
|
||||
return str(soup)
|
||||
|
|
Reference in a new issue