45 lines
981 B
Python
45 lines
981 B
Python
import subprocess
|
|
import urllib
|
|
import os
|
|
import logging
|
|
import codecs
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
logging.basicConfig(level=logging.DEBUG)
|
|
|
|
|
|
BASE_URL = 'https://localhost/'
|
|
OUTPUT_PATH = '../static/snapshots/'
|
|
|
|
URLS = [
|
|
'',
|
|
'guide/',
|
|
'plans/',
|
|
'repository/',
|
|
]
|
|
|
|
for url in URLS:
|
|
final_url = BASE_URL + url
|
|
to_write = OUTPUT_PATH + url + 'index.html'
|
|
|
|
logger.info('Snapshotting url: %s -> %s' % (final_url, to_write))
|
|
|
|
out_html = subprocess.check_output(['phantomjs', '--ignore-ssl-errors=yes',
|
|
'phantomjs-runner.js', final_url])
|
|
|
|
# Remove script tags
|
|
soup = BeautifulSoup(out_html)
|
|
to_extract = soup.findAll('script')
|
|
for item in to_extract:
|
|
item.extract()
|
|
|
|
to_write_dir = os.path.dirname(to_write)
|
|
|
|
if not os.path.exists(to_write_dir):
|
|
os.makedirs(to_write_dir)
|
|
|
|
with codecs.open(to_write, 'w', 'utf-8') as output_file:
|
|
output_file.write(soup.prettify())
|