29 lines
610 B
Python
29 lines
610 B
Python
|
import subprocess
|
||
|
import urllib
|
||
|
|
||
|
from BeautifulSoup import BeautifulSoup
|
||
|
|
||
|
|
||
|
BASE_URL = 'http://localhost:5000'
|
||
|
OUTPUT_PATH = 'snapshots/'
|
||
|
|
||
|
URLS = [
|
||
|
('/', 'index.html')
|
||
|
]
|
||
|
|
||
|
for url, output in URLS:
|
||
|
final_url = BASE_URL + url
|
||
|
|
||
|
out_html = subprocess.check_output(['phantomjs', 'phantomjs-runner.js',
|
||
|
final_url])
|
||
|
|
||
|
# Remove script tags
|
||
|
soup = BeautifulSoup(out_html)
|
||
|
to_extract = soup.findAll('script')
|
||
|
for item in to_extract:
|
||
|
item.extract()
|
||
|
|
||
|
to_write = OUTPUT_PATH + output
|
||
|
|
||
|
with open(to_write, 'w') as output_file:
|
||
|
output_file.write(soup.prettify())
|