This repository has been archived on 2020-03-24. You can view files and clone it, but cannot push or open issues or pull requests.
quay/seo-snapshots/make_snapshot.py

45 lines
981 B
Python

import subprocess
import urllib
import os
import logging
import codecs
from bs4 import BeautifulSoup
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG)
BASE_URL = 'https://localhost/'
OUTPUT_PATH = '../static/snapshots/'
URLS = [
'',
'guide/',
'plans/',
'repository/',
]
for url in URLS:
final_url = BASE_URL + url
to_write = OUTPUT_PATH + url + 'index.html'
logger.info('Snapshotting url: %s -> %s' % (final_url, to_write))
out_html = subprocess.check_output(['phantomjs', '--ignore-ssl-errors=yes',
'phantomjs-runner.js', final_url])
# Remove script tags
soup = BeautifulSoup(out_html)
to_extract = soup.findAll('script')
for item in to_extract:
item.extract()
to_write_dir = os.path.dirname(to_write)
if not os.path.exists(to_write_dir):
os.makedirs(to_write_dir)
with codecs.open(to_write, 'w', 'utf-8') as output_file:
output_file.write(soup.prettify())