28 lines
664 B
Python
28 lines
664 B
Python
import subprocess
|
|
import logging
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def render_snapshot(url):
|
|
logger.info('Snapshotting url: %s' % url)
|
|
|
|
out_html = subprocess.check_output(['phantomjs', '--ignore-ssl-errors=yes',
|
|
'util/phantomjs-runner.js', url])
|
|
|
|
if not out_html or out_html.strip() == 'Not Found':
|
|
return None
|
|
|
|
# Remove script tags
|
|
logger.info('Removing script tags: %s' % url)
|
|
|
|
soup = BeautifulSoup(out_html.decode('utf8'))
|
|
to_extract = soup.findAll('script')
|
|
for item in to_extract:
|
|
item.extract()
|
|
|
|
logger.info('Snapshotted url: %s' % url)
|
|
|
|
return str(soup)
|