[IMP] test impprove HTTPCase opener
Simplify website crawler using the generic HTTPCase opener
This commit is contained in:
parent
f9e24e1918
commit
78e044b5c7
|
@ -1,5 +1,5 @@
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
import test_converter
|
import test_converter
|
||||||
import test_requests
|
import test_crawl
|
||||||
import test_ui
|
import test_ui
|
||||||
import test_views
|
import test_views
|
||||||
|
|
|
@ -1,35 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
import unittest2
|
|
||||||
|
|
||||||
class URLCase(unittest2.TestCase):
|
|
||||||
"""
|
|
||||||
URLCase moved out of test_requests, otherwise discovery attempts to
|
|
||||||
instantiate and run it
|
|
||||||
"""
|
|
||||||
def __init__(self, user, url, source, result):
|
|
||||||
super(URLCase, self).__init__()
|
|
||||||
self.user = user
|
|
||||||
self.url = url
|
|
||||||
self.source = source
|
|
||||||
self.result = result
|
|
||||||
|
|
||||||
@property
|
|
||||||
def username(self):
|
|
||||||
return self.user or "Anonymous Coward"
|
|
||||||
|
|
||||||
def __str__(self):
|
|
||||||
if self.source:
|
|
||||||
return "%s (from %s, as %s)" % (self.url, self.source, self.username)
|
|
||||||
return "%s (as %s)" % (self.url, self.username)
|
|
||||||
|
|
||||||
__repr__ = __str__
|
|
||||||
|
|
||||||
def shortDescription(self):
|
|
||||||
return ""
|
|
||||||
|
|
||||||
def runTest(self):
|
|
||||||
code = self.result.getcode()
|
|
||||||
self.assertIn(
|
|
||||||
code, xrange(200, 300),
|
|
||||||
"Fetching %s as %s returned an error response (%d)" % (
|
|
||||||
self.url, self.username, code))
|
|
|
@ -0,0 +1,84 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
import logging
|
||||||
|
import urlparse
|
||||||
|
import unittest2
|
||||||
|
import urllib2
|
||||||
|
import time
|
||||||
|
import werkzeug.urls
|
||||||
|
|
||||||
|
import lxml.html
|
||||||
|
|
||||||
|
import openerp
|
||||||
|
from openerp import tools
|
||||||
|
|
||||||
|
_logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class Crawler(openerp.tests.HttpCase):
|
||||||
|
""" Test suite crawling an openerp CMS instance and checking that all
|
||||||
|
internal links lead to a 200 response.
|
||||||
|
|
||||||
|
If a username and a password are provided, authenticates the user before
|
||||||
|
starting the crawl
|
||||||
|
"""
|
||||||
|
|
||||||
|
at_install = False
|
||||||
|
post_install = True
|
||||||
|
|
||||||
|
def crawl(self, url, seen=None, msg=''):
|
||||||
|
if seen == None:
|
||||||
|
seen = set()
|
||||||
|
if url in seen:
|
||||||
|
return seen
|
||||||
|
else:
|
||||||
|
seen.add(url)
|
||||||
|
|
||||||
|
_logger.info("%s %s", msg, url)
|
||||||
|
r = self.url_open(url)
|
||||||
|
code = r.getcode()
|
||||||
|
self.assertIn( code, xrange(200, 300), "%s Fetching %s returned error response (%d)" % (msg, url, code))
|
||||||
|
|
||||||
|
if r.info().gettype() == 'text/html':
|
||||||
|
doc = lxml.html.fromstring(r.read())
|
||||||
|
for link in doc.xpath('//a[@href]'):
|
||||||
|
href = link.get('href')
|
||||||
|
|
||||||
|
parts = urlparse.urlsplit(href)
|
||||||
|
# href with any fragment removed
|
||||||
|
href = urlparse.urlunsplit((
|
||||||
|
parts.scheme,
|
||||||
|
parts.netloc,
|
||||||
|
parts.path,
|
||||||
|
parts.query,
|
||||||
|
''
|
||||||
|
))
|
||||||
|
|
||||||
|
# FIXME: handle relative link (not parts.path.startswith /)
|
||||||
|
if parts.netloc or \
|
||||||
|
not parts.path.startswith('/') or \
|
||||||
|
parts.path == '/web' or\
|
||||||
|
parts.path.startswith('/web/') or \
|
||||||
|
parts.path.startswith('/en_US/') or \
|
||||||
|
(parts.scheme and parts.scheme not in ('http', 'https')):
|
||||||
|
continue
|
||||||
|
|
||||||
|
self.crawl(href, seen, msg)
|
||||||
|
return seen
|
||||||
|
|
||||||
|
|
||||||
|
def test_10_crawl_public(self):
|
||||||
|
t0 = time.time()
|
||||||
|
seen = self.crawl('/', msg='Anonymous Coward')
|
||||||
|
_logger.log(25, "public crawled %s urls in %.2fs", len(seen) ,time.time() - t0)
|
||||||
|
|
||||||
|
def test_20_crawl_demo(self):
|
||||||
|
t0 = time.time()
|
||||||
|
self.authenticate('demo', 'demo')
|
||||||
|
seen = self.crawl('/', msg='demo')
|
||||||
|
_logger.log(25, "demo crawled %s urls in %.2fs", len(seen), time.time() - t0)
|
||||||
|
|
||||||
|
def test_30_crawl_admin(self):
|
||||||
|
t0 = time.time()
|
||||||
|
self.authenticate('admin', 'admin')
|
||||||
|
seen = self.crawl('/', msg='admin')
|
||||||
|
_logger.log(25, "admin crawled %s urls in %.2fs", len(seen), time.time() - t0)
|
||||||
|
|
|
@ -1,155 +0,0 @@
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
import urlparse
|
|
||||||
import unittest2
|
|
||||||
import urllib2
|
|
||||||
import werkzeug.urls
|
|
||||||
|
|
||||||
import lxml.html
|
|
||||||
|
|
||||||
import openerp
|
|
||||||
from openerp import tools
|
|
||||||
|
|
||||||
import cases
|
|
||||||
|
|
||||||
__all__ = ['load_tests', 'CrawlSuite']
|
|
||||||
|
|
||||||
class RedirectHandler(urllib2.HTTPRedirectHandler):
|
|
||||||
"""
|
|
||||||
HTTPRedirectHandler is predicated upon HTTPErrorProcessor being used and
|
|
||||||
works by intercepting 3xy "errors".
|
|
||||||
|
|
||||||
Inherit from it to handle 3xy non-error responses instead, as we're not
|
|
||||||
using the error processor
|
|
||||||
"""
|
|
||||||
|
|
||||||
def http_response(self, request, response):
|
|
||||||
code, msg, hdrs = response.code, response.msg, response.info()
|
|
||||||
|
|
||||||
if 300 <= code < 400:
|
|
||||||
return self.parent.error(
|
|
||||||
'http', request, response, code, msg, hdrs)
|
|
||||||
|
|
||||||
return response
|
|
||||||
|
|
||||||
https_response = http_response
|
|
||||||
|
|
||||||
class CrawlSuite(unittest2.TestSuite):
|
|
||||||
""" Test suite crawling an openerp CMS instance and checking that all
|
|
||||||
internal links lead to a 200 response.
|
|
||||||
|
|
||||||
If a username and a password are provided, authenticates the user before
|
|
||||||
starting the crawl
|
|
||||||
"""
|
|
||||||
|
|
||||||
at_install = False
|
|
||||||
post_install = True
|
|
||||||
|
|
||||||
def __init__(self, user=None, password=None):
|
|
||||||
super(CrawlSuite, self).__init__()
|
|
||||||
|
|
||||||
registry = openerp.registry(tools.config['db_name'])
|
|
||||||
try:
|
|
||||||
# switch registry to test mode, so that requests can be made
|
|
||||||
registry.enter_test_mode()
|
|
||||||
|
|
||||||
self.opener = urllib2.OpenerDirector()
|
|
||||||
self.opener.add_handler(urllib2.UnknownHandler())
|
|
||||||
self.opener.add_handler(urllib2.HTTPHandler())
|
|
||||||
self.opener.add_handler(urllib2.HTTPSHandler())
|
|
||||||
self.opener.add_handler(urllib2.HTTPCookieProcessor())
|
|
||||||
self.opener.add_handler(RedirectHandler())
|
|
||||||
|
|
||||||
self._authenticate(user, password)
|
|
||||||
self.user = user
|
|
||||||
|
|
||||||
finally:
|
|
||||||
registry.leave_test_mode()
|
|
||||||
|
|
||||||
def _request(self, path):
|
|
||||||
return self.opener.open(urlparse.urlunsplit([
|
|
||||||
'http', 'localhost:%s' % tools.config['xmlrpc_port'],
|
|
||||||
path, '', ''
|
|
||||||
]))
|
|
||||||
|
|
||||||
def _authenticate(self, user, password):
|
|
||||||
# force tools.config['db_name'] in user session so opening `/` doesn't
|
|
||||||
# blow up in multidb situations
|
|
||||||
self.opener.open('http://localhost:{port}/web/?db={db}'.format(
|
|
||||||
port=tools.config['xmlrpc_port'],
|
|
||||||
db=werkzeug.urls.url_quote_plus(tools.config['db_name']),
|
|
||||||
))
|
|
||||||
if user is not None:
|
|
||||||
url = 'http://localhost:{port}/login?{query}'.format(
|
|
||||||
port=tools.config['xmlrpc_port'],
|
|
||||||
query=werkzeug.urls.url_encode({
|
|
||||||
'db': tools.config['db_name'],
|
|
||||||
'login': user,
|
|
||||||
'key': password,
|
|
||||||
})
|
|
||||||
)
|
|
||||||
auth = self.opener.open(url)
|
|
||||||
assert auth.getcode() < 400, "Auth failure %d" % auth.getcode()
|
|
||||||
|
|
||||||
def _wrapped_run(self, result, debug=False):
|
|
||||||
registry = openerp.registry(tools.config['db_name'])
|
|
||||||
try:
|
|
||||||
# switch registry to test mode, so that requests can be made
|
|
||||||
registry.enter_test_mode()
|
|
||||||
|
|
||||||
paths = [URL('/')]
|
|
||||||
seen = set(paths)
|
|
||||||
|
|
||||||
while paths:
|
|
||||||
url = paths.pop(0)
|
|
||||||
r = self._request(url.url)
|
|
||||||
url.to_case(self.user, r).run(result)
|
|
||||||
|
|
||||||
if r.info().gettype() != 'text/html':
|
|
||||||
continue
|
|
||||||
|
|
||||||
doc = lxml.html.fromstring(r.read())
|
|
||||||
for link in doc.xpath('//a[@href]'):
|
|
||||||
href = link.get('href')
|
|
||||||
|
|
||||||
parts = urlparse.urlsplit(href)
|
|
||||||
# href with any fragment removed
|
|
||||||
href = urlparse.urlunsplit((
|
|
||||||
parts.scheme,
|
|
||||||
parts.netloc,
|
|
||||||
parts.path,
|
|
||||||
parts.query,
|
|
||||||
''
|
|
||||||
))
|
|
||||||
|
|
||||||
# avoid repeats, even for links we won't crawl no need to
|
|
||||||
# bother splitting them if we've already ignored them
|
|
||||||
# previously
|
|
||||||
if href in seen: continue
|
|
||||||
seen.add(href)
|
|
||||||
|
|
||||||
# FIXME: handle relative link (not parts.path.startswith /)
|
|
||||||
if parts.netloc or \
|
|
||||||
not parts.path.startswith('/') or \
|
|
||||||
parts.path == '/web' or\
|
|
||||||
parts.path.startswith('/web/') or \
|
|
||||||
(parts.scheme and parts.scheme not in ('http', 'https')):
|
|
||||||
continue
|
|
||||||
|
|
||||||
paths.append(URL(href, url.url))
|
|
||||||
|
|
||||||
finally:
|
|
||||||
registry.leave_test_mode()
|
|
||||||
|
|
||||||
class URL(object):
|
|
||||||
def __init__(self, url, source=None):
|
|
||||||
self.url = url
|
|
||||||
self.source = source
|
|
||||||
|
|
||||||
def to_case(self, user, result):
|
|
||||||
return cases.URLCase(user, self.url, self.source, result)
|
|
||||||
|
|
||||||
def load_tests(loader, base, _):
|
|
||||||
base.addTest(CrawlSuite())
|
|
||||||
base.addTest(CrawlSuite('admin', 'admin'))
|
|
||||||
base.addTest(CrawlSuite('demo', 'demo'))
|
|
||||||
return base
|
|
|
@ -231,12 +231,12 @@ def load_module_graph(cr, graph, status=None, perform_checks=True, skip_modules=
|
||||||
registry._init_modules.add(package.name)
|
registry._init_modules.add(package.name)
|
||||||
cr.commit()
|
cr.commit()
|
||||||
|
|
||||||
_logger.log(25, "%s modules loaded in %.2fs", len(graph), time.time() - ta0)
|
_logger.log(25, "%s modules loaded in %.2fs", len(graph), time.time() - ta0)
|
||||||
|
|
||||||
# The query won't be valid for models created later (i.e. custom model
|
# The query won't be valid for models created later (i.e. custom model
|
||||||
# created after the registry has been loaded), so empty its result.
|
# created after the registry has been loaded), so empty its result.
|
||||||
registry.fields_by_model = None
|
registry.fields_by_model = None
|
||||||
|
|
||||||
cr.commit()
|
cr.commit()
|
||||||
|
|
||||||
return loaded_modules, processed_modules
|
return loaded_modules, processed_modules
|
||||||
|
|
|
@ -428,7 +428,8 @@ def run_unit_tests(module_name, dbname, position=runs_at_install):
|
||||||
tm0 = time.time()
|
tm0 = time.time()
|
||||||
_logger.info('%s running tests.', m.__name__)
|
_logger.info('%s running tests.', m.__name__)
|
||||||
result = unittest2.TextTestRunner(verbosity=2, stream=TestStream(m.__name__)).run(suite)
|
result = unittest2.TextTestRunner(verbosity=2, stream=TestStream(m.__name__)).run(suite)
|
||||||
_logger.log(25, "%s tested in %.2fs", m.__name__, time.time() - tm0)
|
if time.time() - tm0 > 5:
|
||||||
|
_logger.log(25, "%s tested in %.2fs", m.__name__, time.time() - tm0)
|
||||||
if not result.wasSuccessful():
|
if not result.wasSuccessful():
|
||||||
r = False
|
r = False
|
||||||
_logger.error("Module %s: %d failures, %d errors", module_name, len(result.failures), len(result.errors))
|
_logger.error("Module %s: %d failures, %d errors", module_name, len(result.failures), len(result.errors))
|
||||||
|
|
|
@ -130,6 +130,25 @@ class SingleTransactionCase(BaseCase):
|
||||||
cls.cr.rollback()
|
cls.cr.rollback()
|
||||||
cls.cr.close()
|
cls.cr.close()
|
||||||
|
|
||||||
|
class RedirectHandler(urllib2.HTTPRedirectHandler):
|
||||||
|
"""
|
||||||
|
HTTPRedirectHandler is predicated upon HTTPErrorProcessor being used and
|
||||||
|
works by intercepting 3xy "errors".
|
||||||
|
|
||||||
|
Inherit from it to handle 3xy non-error responses instead, as we're not
|
||||||
|
using the error processor
|
||||||
|
"""
|
||||||
|
|
||||||
|
def http_response(self, request, response):
|
||||||
|
code, msg, hdrs = response.code, response.msg, response.info()
|
||||||
|
|
||||||
|
if 300 <= code < 400:
|
||||||
|
return self.parent.error(
|
||||||
|
'http', request, response, code, msg, hdrs)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
https_response = http_response
|
||||||
|
|
||||||
class HttpCase(TransactionCase):
|
class HttpCase(TransactionCase):
|
||||||
""" Transactionnal HTTP TestCase with url_open and phantomjs helpers.
|
""" Transactionnal HTTP TestCase with url_open and phantomjs helpers.
|
||||||
|
@ -152,6 +171,14 @@ class HttpCase(TransactionCase):
|
||||||
self.session.db = DB
|
self.session.db = DB
|
||||||
openerp.http.root.session_store.save(self.session)
|
openerp.http.root.session_store.save(self.session)
|
||||||
self.localstorage_path = mkdtemp()
|
self.localstorage_path = mkdtemp()
|
||||||
|
# setup an url opener helper
|
||||||
|
self.opener = urllib2.OpenerDirector()
|
||||||
|
self.opener.add_handler(urllib2.UnknownHandler())
|
||||||
|
self.opener.add_handler(urllib2.HTTPHandler())
|
||||||
|
self.opener.add_handler(urllib2.HTTPSHandler())
|
||||||
|
self.opener.add_handler(urllib2.HTTPCookieProcessor())
|
||||||
|
self.opener.add_handler(RedirectHandler())
|
||||||
|
self.opener.addheaders.append(('Cookie', 'session_id=%s' % self.session_id))
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
rmtree(self.localstorage_path)
|
rmtree(self.localstorage_path)
|
||||||
|
@ -159,11 +186,15 @@ class HttpCase(TransactionCase):
|
||||||
super(HttpCase, self).tearDown()
|
super(HttpCase, self).tearDown()
|
||||||
|
|
||||||
def url_open(self, url, data=None, timeout=10):
|
def url_open(self, url, data=None, timeout=10):
|
||||||
opener = urllib2.build_opener()
|
|
||||||
opener.addheaders.append(('Cookie', 'session_id=%s' % self.session_id))
|
|
||||||
if url.startswith('/'):
|
if url.startswith('/'):
|
||||||
url = "http://localhost:%s%s" % (PORT, url)
|
url = "http://localhost:%s%s" % (PORT, url)
|
||||||
return opener.open(url, data, timeout)
|
return self.opener.open(url, data, timeout)
|
||||||
|
|
||||||
|
def authenticate(self, user, password):
|
||||||
|
if user is not None:
|
||||||
|
url = '/login?%s' % werkzeug.urls.url_encode({'db': DB,'login': user, 'key': password})
|
||||||
|
auth = self.url_open(url)
|
||||||
|
assert auth.getcode() < 400, "Auth failure %d" % auth.getcode()
|
||||||
|
|
||||||
def phantom_poll(self, phantom, timeout):
|
def phantom_poll(self, phantom, timeout):
|
||||||
""" Phantomjs Test protocol.
|
""" Phantomjs Test protocol.
|
||||||
|
|
Loading…
Reference in New Issue