import subprocess import logging from bs4 import BeautifulSoup logger = logging.getLogger(__name__) def render_snapshot(url): logger.info('Snapshotting url: %s' % url) out_html = subprocess.check_output(['phantomjs', '--ignore-ssl-errors=yes', '--disk-cache=yes', 'util/phantomjs-runner.js', url]) if not out_html or out_html.strip() == 'Not Found': return None # Remove script tags logger.info('Removing script tags: %s' % url) soup = BeautifulSoup(out_html.decode('utf8')) to_extract = soup.findAll('script') for item in to_extract: item.extract() logger.info('Snapshotted url: %s' % url) return str(soup)