First stab at trying to pre-render content for search crawlers.

2013-10-10 20:53:14 -04:00 · 2013-10-10 20:53:14 -04:00 · 785995b473
commit 785995b473
parent ce81431cd3
6 changed files with 60 additions and 4 deletions
--- a/seo-snapshots/make_snapshot.py
+++ b/seo-snapshots/make_snapshot.py
@ -0,0 +1,29 @@
+import subprocess
+import urllib
+
+from BeautifulSoup import BeautifulSoup
+
+
+BASE_URL = 'http://localhost:5000'
+OUTPUT_PATH = 'snapshots/'
+
+URLS = [
+  ('/', 'index.html')
+]
+
+for url, output in URLS:
+  final_url = BASE_URL + url
+
+  out_html = subprocess.check_output(['phantomjs', 'phantomjs-runner.js',
+                                      final_url])
+
+  # Remove script tags
+  soup = BeautifulSoup(out_html)
+  to_extract = soup.findAll('script')
+  for item in to_extract:
+      item.extract()
+
+  to_write = OUTPUT_PATH + output
+
+  with open(to_write, 'w') as output_file:
+    output_file.write(soup.prettify())