diff --git a/requirements.txt b/requirements.txt index de2ef9669..e637b3718 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,6 +7,7 @@ MarkupSafe==0.18 PyMySQL==0.5 Werkzeug==0.9.4 argparse==1.2.1 +beautifulsoup4==4.3.2 blinker==1.3 boto==2.13.3 distribute==0.6.34 diff --git a/seo-snapshots/make_snapshot.py b/seo-snapshots/make_snapshot.py index 21ec3f7ab..6ae5de64d 100644 --- a/seo-snapshots/make_snapshot.py +++ b/seo-snapshots/make_snapshot.py @@ -2,15 +2,16 @@ import subprocess import urllib import os import logging +import codecs -from BeautifulSoup import BeautifulSoup +from bs4 import BeautifulSoup logger = logging.getLogger(__name__) logging.basicConfig(level=logging.DEBUG) -BASE_URL = 'http://localhost:5000/' +BASE_URL = 'https://localhost/' OUTPUT_PATH = '../static/snapshots/' URLS = [ @@ -26,8 +27,8 @@ for url in URLS: logger.info('Snapshotting url: %s -> %s' % (final_url, to_write)) - out_html = subprocess.check_output(['phantomjs', 'phantomjs-runner.js', - final_url]) + out_html = subprocess.check_output(['phantomjs', '--ignore-ssl-errors=yes', + 'phantomjs-runner.js', final_url]) # Remove script tags soup = BeautifulSoup(out_html) @@ -40,5 +41,5 @@ for url in URLS: if not os.path.exists(to_write_dir): os.makedirs(to_write_dir) - with open(to_write, 'w') as output_file: - output_file.write(soup.prettify()) \ No newline at end of file + with codecs.open(to_write, 'w', 'utf-8') as output_file: + output_file.write(soup.prettify())