First stab at trying to pre-render content for search crawlers.
This commit is contained in:
parent
ce81431cd3
commit
785995b473
6 changed files with 60 additions and 4 deletions
29
seo-snapshots/make_snapshot.py
Normal file
29
seo-snapshots/make_snapshot.py
Normal file
|
@ -0,0 +1,29 @@
|
|||
import subprocess
|
||||
import urllib
|
||||
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
|
||||
|
||||
BASE_URL = 'http://localhost:5000'
|
||||
OUTPUT_PATH = 'snapshots/'
|
||||
|
||||
URLS = [
|
||||
('/', 'index.html')
|
||||
]
|
||||
|
||||
for url, output in URLS:
|
||||
final_url = BASE_URL + url
|
||||
|
||||
out_html = subprocess.check_output(['phantomjs', 'phantomjs-runner.js',
|
||||
final_url])
|
||||
|
||||
# Remove script tags
|
||||
soup = BeautifulSoup(out_html)
|
||||
to_extract = soup.findAll('script')
|
||||
for item in to_extract:
|
||||
item.extract()
|
||||
|
||||
to_write = OUTPUT_PATH + output
|
||||
|
||||
with open(to_write, 'w') as output_file:
|
||||
output_file.write(soup.prettify())
|
23
seo-snapshots/phantomjs-runner.js
Normal file
23
seo-snapshots/phantomjs-runner.js
Normal file
|
@ -0,0 +1,23 @@
|
|||
var system = require('system');
|
||||
var url = system.args[1] || '';
|
||||
if(url.length > 0) {
|
||||
var page = require('webpage').create();
|
||||
page.open(url, function (status) {
|
||||
if (status == 'success') {
|
||||
var delay, checker = (function() {
|
||||
var html = page.evaluate(function () {
|
||||
var ready = document.getElementsByClassName('ready-indicator')[0];
|
||||
if(ready.getAttribute('data-status') == 'ready') {
|
||||
return document.getElementsByTagName('html')[0].outerHTML;
|
||||
}
|
||||
});
|
||||
if(html) {
|
||||
clearTimeout(delay);
|
||||
console.log(html);
|
||||
phantom.exit();
|
||||
}
|
||||
});
|
||||
delay = setInterval(checker, 100);
|
||||
}
|
||||
});
|
||||
}
|
Reference in a new issue