From 738973cf3957c30285aca4b6e42d2b45673ff27e Mon Sep 17 00:00:00 2001 From: Joseph Schorr Date: Mon, 18 Nov 2013 17:11:06 -0500 Subject: [PATCH] Add the snapshot endpoint to web.py and have the phantomjs running only load the page's HTML once there are no further pending XHR requests --- endpoints/web.py | 12 ++++++++++++ static/js/app.js | 12 +++++++++++- util/phantomjs-runner.js | 37 +++++++++++++++++++++++++++++++++++++ util/seo.py | 27 +++++++++++++++++++++++++++ 4 files changed, 87 insertions(+), 1 deletion(-) create mode 100644 util/phantomjs-runner.js create mode 100644 util/seo.py diff --git a/endpoints/web.py b/endpoints/web.py index ff6f1790a..c76d2f60b 100644 --- a/endpoints/web.py +++ b/endpoints/web.py @@ -11,6 +11,7 @@ from data import model from app import app, login_manager, mixpanel from auth.permissions import QuayDeferredPermissionUser, AdministerOrganizationPermission from util.invoice import renderInvoiceToPdf +from util.seo import renderSnapshot logger = logging.getLogger(__name__) @@ -49,6 +50,17 @@ def index(path): return render_template('index.html') +@app.route('/snapshot', methods=['GET']) +@app.route('/snapshot/', methods=['GET']) +@app.route('/snapshot/', methods=['GET']) +def snapshot(path = ''): + result = renderSnapshot(path) + if result: + return result + + abort(404) + + @app.route('/plans/') def plans(): return index('') diff --git a/static/js/app.js b/static/js/app.js index 26d7aa5da..13b63a5b6 100644 --- a/static/js/app.js +++ b/static/js/app.js @@ -1053,7 +1053,8 @@ quayApp.directive('ngBlur', function() { }; }); -quayApp.run(['$location', '$rootScope', 'Restangular', 'UserService', function($location, $rootScope, Restangular, UserService) { +quayApp.run(['$location', '$rootScope', 'Restangular', 'UserService', '$http', + function($location, $rootScope, Restangular, UserService, $http) { Restangular.setErrorInterceptor(function(response) { if (response.status == 401) { $('#sessionexpiredModal').modal({}); @@ -1068,4 +1069,13 @@ quayApp.run(['$location', '$rootScope', 'Restangular', 'UserService', function($ $rootScope.title = current.$$route.title; } }); + + var initallyChecked = false; + window.__isLoading = function() { + if (!initallyChecked) { + initallyChecked = true; + return true; + } + return $http.pendingRequests.length > 0; + }; }]); diff --git a/util/phantomjs-runner.js b/util/phantomjs-runner.js new file mode 100644 index 000000000..30b0439fa --- /dev/null +++ b/util/phantomjs-runner.js @@ -0,0 +1,37 @@ +var system = require('system'); +var url = system.args[1] || ''; +if(url.length > 0) { + var page = require('webpage').create(); + page.open(url, function (status) { + if (status == 'success') { + var delay, checker = (function() { + var html = page.evaluate(function () { + var found = document.getElementsByTagName('html')[0].outerHTML || ''; + if (window.__isLoading && !window.__isLoading()) { + return found; + } + if (found.indexOf('404 Not Found') > 0) { + return found; + } + return null; + }); + + if (html) { + if (html.indexOf('404 Not Found') > 0) { + console.log('Not Found'); + phantom.exit(); + return; + } + + clearTimeout(delay); + console.log(html); + phantom.exit(); + } + }); + delay = setInterval(checker, 100); + } else { + console.log('Not Found'); + phantom.exit(); + } + }); +} \ No newline at end of file diff --git a/util/seo.py b/util/seo.py new file mode 100644 index 000000000..6958e2c67 --- /dev/null +++ b/util/seo.py @@ -0,0 +1,27 @@ +import subprocess +import urllib +import os +import logging +import codecs + +from bs4 import BeautifulSoup + +logger = logging.getLogger(__name__) +logging.basicConfig(level=logging.DEBUG) + +def renderSnapshot(path): + final_url = 'http://localhost:5000/' + path + logger.info('Snapshotting url: %s -> %s' % (path, final_url)) + out_html = subprocess.check_output(['phantomjs', '--ignore-ssl-errors=yes', + 'util/phantomjs-runner.js', final_url]) + + if not out_html or out_html.strip() == 'Not Found': + return None + + # Remove script tags + soup = BeautifulSoup(out_html) + to_extract = soup.findAll('script') + for item in to_extract: + item.extract() + + return soup.prettify()