[IMP] website: improve speed of test crawl and load only one slug by route
If they are this routes: /partner/p-1 /partner/p-2 ... /partner/grade-1/p-1 /partner/grade-1/p-2 ... /partner/grade-2/p-1 /partner/grade-2/p-2 ... We want test only one time the routes: /partner/p-1 /partner/grade-1/p-1
This commit is contained in:
parent
7c751e9243
commit
9fbdd2d83d
|
@ -1,15 +1,12 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import logging
|
||||
import urlparse
|
||||
import unittest2
|
||||
import urllib2
|
||||
import time
|
||||
import werkzeug.urls
|
||||
|
||||
import lxml.html
|
||||
|
||||
import openerp
|
||||
from openerp import tools
|
||||
import re
|
||||
|
||||
_logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -25,12 +22,15 @@ class Crawler(openerp.tests.HttpCase):
|
|||
post_install = True
|
||||
|
||||
def crawl(self, url, seen=None, msg=''):
|
||||
if seen == None:
|
||||
if seen == None:
|
||||
seen = set()
|
||||
if url in seen:
|
||||
|
||||
url_slug = re.sub(r"[/](([^/=?&]+-)?[0-9]+)([/]|$)", '/<slug>/', url)
|
||||
url_slug = re.sub(r"([^/=?&]+)=[^/=?&]+", '\g<1>=param', url_slug)
|
||||
if url_slug in seen:
|
||||
return seen
|
||||
else:
|
||||
seen.add(url)
|
||||
seen.add(url_slug)
|
||||
|
||||
_logger.info("%s %s", msg, url)
|
||||
r = self.url_open(url)
|
||||
|
|
Loading…
Reference in New Issue