import subprocess import urllib import os import logging import codecs from bs4 import BeautifulSoup logger = logging.getLogger(__name__) logging.basicConfig(level=logging.DEBUG) def renderSnapshot(path): final_url = 'http://localhost:5000/' + path logger.info('Snapshotting url: %s -> %s' % (path, final_url)) out_html = subprocess.check_output(['phantomjs', '--ignore-ssl-errors=yes', 'util/phantomjs-runner.js', final_url]) if not out_html or out_html.strip() == 'Not Found': return None # Remove script tags soup = BeautifulSoup(out_html) to_extract = soup.findAll('script') for item in to_extract: item.extract() return soup.prettify()