[FIX] website: seo, auto detection of keywords with accents in boundaries

Javascript regex \b is not unicode aware,
and words beginning or ending by accents won't match \b

We therefore use a custom regex to replace \b.
Basically, it's a regex matching all words separators
such as dot, comma, spaces, etc. and other unicode
separators as well.

opw-641005
This commit is contained in:
Denis Ledoux 2015-05-28 15:13:01 +02:00
parent 970f9466aa
commit 9fe059027e
1 changed files with 7 additions and 3 deletions

View File

@ -6,6 +6,10 @@
website.seo = {};
// This replaces \b, because accents(e.g. à, é) are not seen as word boundaries.
// Javascript \b is not unicode aware, and words beginning or ending by accents won't match \b
var WORD_SEPARATORS_REGEX = '([\\u2000-\\u206F\\u2E00-\\u2E7F\'!"#\\$%&\\(\\)\\*\\+,\\-\\.\\/:;<=>\\?¿¡@\\[\\]\\^_`\\{\\|\\}~\\s]+|^|$)';
function analyzeKeyword(htmlPage, keyword) {
return htmlPage.isInTitle(keyword) ? {
title: 'label label-primary',
@ -293,13 +297,13 @@
return $('body').children().not('.js_seo_configuration').text();
},
isInBody: function (text) {
return new RegExp("\\b"+text+"\\b", "gi").test(this.bodyText());
return new RegExp(WORD_SEPARATORS_REGEX+text+WORD_SEPARATORS_REGEX, "gi").test(this.bodyText());
},
isInTitle: function (text) {
return new RegExp("\\b"+text+"\\b", "gi").test(this.title());
return new RegExp(WORD_SEPARATORS_REGEX+text+WORD_SEPARATORS_REGEX, "gi").test(this.title());
},
isInDescription: function (text) {
return new RegExp("\\b"+text+"\\b", "gi").test(this.description());
return new RegExp(WORD_SEPARATORS_REGEX+text+WORD_SEPARATORS_REGEX, "gi").test(this.description());
},
});