First stab at trying to pre-render content for search crawlers.
This commit is contained in:
parent
ce81431cd3
commit
785995b473
6 changed files with 60 additions and 4 deletions
29
seo-snapshots/make_snapshot.py
Normal file
29
seo-snapshots/make_snapshot.py
Normal file
|
@ -0,0 +1,29 @@
|
|||
import subprocess
|
||||
import urllib
|
||||
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
|
||||
|
||||
BASE_URL = 'http://localhost:5000'
|
||||
OUTPUT_PATH = 'snapshots/'
|
||||
|
||||
URLS = [
|
||||
('/', 'index.html')
|
||||
]
|
||||
|
||||
for url, output in URLS:
|
||||
final_url = BASE_URL + url
|
||||
|
||||
out_html = subprocess.check_output(['phantomjs', 'phantomjs-runner.js',
|
||||
final_url])
|
||||
|
||||
# Remove script tags
|
||||
soup = BeautifulSoup(out_html)
|
||||
to_extract = soup.findAll('script')
|
||||
for item in to_extract:
|
||||
item.extract()
|
||||
|
||||
to_write = OUTPUT_PATH + output
|
||||
|
||||
with open(to_write, 'w') as output_file:
|
||||
output_file.write(soup.prettify())
|
Reference in a new issue