Merge branch 'MDL-52214-master-crawler' of git://github.com/mudrd8mz/moodle

This commit is contained in:
David Monllao 2015-11-23 17:00:24 +08:00
commit 57d4e11334
4 changed files with 62 additions and 4 deletions

View file

@ -215,7 +215,7 @@ class core_useragent {
* @return bool * @return bool
*/ */
protected function is_useragent_web_crawler() { protected function is_useragent_web_crawler() {
$regex = '/Googlebot|google\.com|Yahoo! Slurp|\[ZSEBOT\]|msnbot|bingbot|BingPreview|Yandex|AltaVista|Baiduspider|Teoma/'; $regex = '/Googlebot|google\.com|Yahoo! Slurp|\[ZSEBOT\]|msnbot|bingbot|BingPreview|Yandex|AltaVista|Baiduspider|Teoma/i';
return (preg_match($regex, $this->useragent)); return (preg_match($regex, $this->useragent));
} }

View file

@ -2395,8 +2395,8 @@ function get_referer($stripquery = true) {
* @deprecated since Moodle 3.0 use \core_useragent::is_web_crawler instead. * @deprecated since Moodle 3.0 use \core_useragent::is_web_crawler instead.
*/ */
function is_web_crawler() { function is_web_crawler() {
debugging("is_web_crawler() has been deprecated, please use \\core_useragent\\is_web_crawler() instead.", DEBUG_DEVELOPER); debugging('is_web_crawler() has been deprecated, please use core_useragent::is_web_crawler() instead.', DEBUG_DEVELOPER);
return core_useragent::is_crawler(); return core_useragent::is_web_crawler();
} }
/** /**

View file

@ -29,7 +29,7 @@
* @copyright 2013 Sam Hemelryk * @copyright 2013 Sam Hemelryk
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
*/ */
class core_useragent_testcase extends basic_testcase { class core_useragent_testcase extends advanced_testcase {
/** /**
* Restores the user agent to the default one. * Restores the user agent to the default one.
@ -1865,4 +1865,61 @@ class core_useragent_testcase extends basic_testcase {
$expectation = isset($tests['is_web_crawler']) ? $tests['is_web_crawler'] : false; $expectation = isset($tests['is_web_crawler']) ? $tests['is_web_crawler'] : false;
$this->assertSame($expectation, core_useragent::is_web_crawler()); $this->assertSame($expectation, core_useragent::is_web_crawler());
} }
/**
* Regression tests for the deprecated is_web_crawler() function
*/
public function test_deprecated_is_web_crawler() {
$browsers = array(
'Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US))',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:18.0) Gecko/18.0 Firefox/18.0',
'Mozilla/5.0 (Macintosh; U; PPC Mac OS X; en) AppleWebKit/412 (KHTML, like Gecko) Safari/412',
'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; en-US) AppleWebKit/534.10 (KHTML, like Gecko) Chrome/8.0.552.215 Safari/534.10',
'Opera/9.0 (Windows NT 5.1; U; en)',
'Mozilla/5.0 (Linux; U; Android 2.1; en-us; Nexus One Build/ERD62) AppleWebKit/530.17 (KHTML, like Gecko) Version/4.0 Mobile Safari/530.17 Nexus',
'Mozilla/5.0 (iPad; U; CPU OS 4_2_1 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8C148 Safari/6533.18.5',
);
$crawlers = array(
// Google.
'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
'Googlebot/2.1 (+http://www.googlebot.com/bot.html)',
'Googlebot-Image/1.0',
// Yahoo.
'Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)',
// Bing.
'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)',
'Mozilla/5.0 (compatible; bingbot/2.0 +http://www.bing.com/bingbot.htm)',
// MSN.
'msnbot/2.1',
// Yandex.
'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)',
'Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex.com/bots)',
// AltaVista.
'AltaVista V2.0B crawler@evreka.com',
// ZoomSpider.
'ZoomSpider - wrensoft.com [ZSEBOT]',
// Baidu.
'Baiduspider+(+http://www.baidu.com/search/spider_jp.html)',
'Baiduspider+(+http://www.baidu.com/search/spider.htm)',
'BaiDuSpider',
// Ask.com.
'User-Agent: Mozilla/2.0 (compatible; Ask Jeeves/Teoma)',
);
foreach ($browsers as $agent) {
core_useragent::instance(true, $agent);
$this->assertSame($agent, core_useragent::get_user_agent_string());
$this->assertFalse(is_web_crawler());
$this->assertDebuggingCalled('is_web_crawler() has been deprecated, please use core_useragent::is_web_crawler() instead.',
DEBUG_DEVELOPER);
}
foreach ($crawlers as $agent) {
core_useragent::instance(true, $agent);
$this->assertSame($agent, core_useragent::get_user_agent_string());
$this->assertTrue(is_web_crawler(), "$agent should be considered a search engine");
$this->assertDebuggingCalled('is_web_crawler() has been deprecated, please use core_useragent::is_web_crawler() instead.',
DEBUG_DEVELOPER);
}
}
} }

View file

@ -151,6 +151,7 @@ information provided here is intended especially for developers.
line interface scripts. line interface scripts.
* External function core_course_external::get_course_contents returned parameter "name" has been changed to PARAM_RAW, * External function core_course_external::get_course_contents returned parameter "name" has been changed to PARAM_RAW,
this is because the new external_format_string function may return raw data if the global moodlewssettingraw parameter is used. this is because the new external_format_string function may return raw data if the global moodlewssettingraw parameter is used.
* Function is_web_crawler() has been deprecated, please use core_useragent::is_web_crawler() instead.
=== 2.9.1 === === 2.9.1 ===