This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/util/seo.py
2014-05-19 13:18:37 -04:00

29 lines
722 B
Python

import subprocess
import logging
from bs4 import BeautifulSoup
logger = logging.getLogger(__name__)
def render_snapshot(url):
logger.info('Snapshotting url: %s' % url)
out_html = subprocess.check_output(['phantomjs', '--ignore-ssl-errors=yes',
'--disk-cache=yes',
'util/phantomjs-runner.js', url])
if not out_html or out_html.strip() == 'Not Found':
return None
# Remove script tags
logger.info('Removing script tags: %s' % url)
soup = BeautifulSoup(out_html.decode('utf8'))
to_extract = soup.findAll('script')
for item in to_extract:
item.extract()
logger.info('Snapshotted url: %s' % url)
return str(soup)