mirror of
https://github.com/moodle/moodle.git
synced 2025-08-04 08:26:37 +02:00
MDL-38170 SimplePie: Cannot read https feeds through proxy
This commit is contained in:
parent
f29e62cb6c
commit
85cb4b65b1
3 changed files with 137 additions and 1 deletions
|
@ -3436,6 +3436,45 @@ class curl {
|
|||
public function get_errno() {
|
||||
return $this->errno;
|
||||
}
|
||||
|
||||
/**
|
||||
* When using a proxy, an additional HTTP response code may appear at
|
||||
* the start of the header. For example, when using https over a proxy
|
||||
* there may be 'HTTP/1.0 200 Connection Established'. Other codes are
|
||||
* also possible and some may come with their own headers.
|
||||
*
|
||||
* If using the return value containing all headers, this function can be
|
||||
* called to remove unwanted doubles.
|
||||
*
|
||||
* Note that it is not possible to distinguish this situation from valid
|
||||
* data unless you know the actual response part (below the headers)
|
||||
* will not be included in this string, or else will not 'look like' HTTP
|
||||
* headers. As a result it is not safe to call this function for general
|
||||
* data.
|
||||
*
|
||||
* @param string $input Input HTTP response
|
||||
* @return string HTTP response with additional headers stripped if any
|
||||
*/
|
||||
public static function strip_double_headers($input) {
|
||||
// I have tried to make this regular expression as specific as possible
|
||||
// to avoid any case where it does weird stuff if you happen to put
|
||||
// HTTP/1.1 200 at the start of any line in your RSS file. This should
|
||||
// also make it faster because it can abandon regex processing as soon
|
||||
// as it hits something that doesn't look like an http header. The
|
||||
// header definition is taken from RFC 822, except I didn't support
|
||||
// folding which is never used in practice.
|
||||
$crlf = "\r\n";
|
||||
return preg_replace(
|
||||
// HTTP version and status code (ignore value of code).
|
||||
'~^HTTP/1\..*' . $crlf .
|
||||
// Header name: character between 33 and 126 decimal, except colon.
|
||||
// Colon. Header value: any character except \r and \n. CRLF.
|
||||
'(?:[\x21-\x39\x3b-\x7e]+:[^' . $crlf . ']+' . $crlf . ')*' .
|
||||
// Headers are terminated by another CRLF (blank line).
|
||||
$crlf .
|
||||
// Second HTTP status code, this time must be 200.
|
||||
'(HTTP/1.[01] 200 )~', '$1', $input);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -139,7 +139,7 @@ class moodle_simplepie_file extends SimplePie_File
|
|||
}
|
||||
}
|
||||
|
||||
$this->headers = $curl->get($url);
|
||||
$this->headers = curl::strip_double_headers($curl->get($url));
|
||||
|
||||
if ($curl->error) {
|
||||
$this->error = 'cURL Error: '.$curl->error;
|
||||
|
|
|
@ -270,4 +270,101 @@ class filelib_testcase extends advanced_testcase {
|
|||
$this->assertEquals($contenthash, $fileref->get_contenthash());
|
||||
$this->assertEquals($filecontent, $fileref->get_content());
|
||||
}
|
||||
|
||||
/**
|
||||
* Tests the strip_double_headers function in the curl class.
|
||||
*/
|
||||
public function test_curl_strip_double_headers() {
|
||||
// Example from issue tracker.
|
||||
$mdl30648example = <<<EOF
|
||||
HTTP/1.0 407 Proxy Authentication Required
|
||||
Server: squid/2.7.STABLE9
|
||||
Date: Thu, 08 Dec 2011 14:44:33 GMT
|
||||
Content-Type: text/html
|
||||
Content-Length: 1275
|
||||
X-Squid-Error: ERR_CACHE_ACCESS_DENIED 0
|
||||
Proxy-Authenticate: Basic realm="Squid proxy-caching web server"
|
||||
X-Cache: MISS from homer.lancs.ac.uk
|
||||
X-Cache-Lookup: NONE from homer.lancs.ac.uk:3128
|
||||
Via: 1.0 homer.lancs.ac.uk:3128 (squid/2.7.STABLE9)
|
||||
Connection: close
|
||||
|
||||
HTTP/1.0 200 OK
|
||||
Server: Apache
|
||||
X-Lb-Nocache: true
|
||||
Cache-Control: private, max-age=15
|
||||
ETag: "4d69af5d8ba873ea9192c489e151bd7b"
|
||||
Content-Type: text/html
|
||||
Date: Thu, 08 Dec 2011 14:44:53 GMT
|
||||
Set-Cookie: BBC-UID=c4de2e109c8df6a51de627cee11b214bd4fb6054a030222488317afb31b343360MoodleBot/1.0; expires=Mon, 07-Dec-15 14:44:53 GMT; path=/; domain=bbc.co.uk
|
||||
X-Cache-Action: MISS
|
||||
X-Cache-Age: 0
|
||||
Vary: Cookie,X-Country,X-Ip-is-uk-combined,X-Ip-is-advertise-combined,X-Ip_is_uk_combined,X-Ip_is_advertise_combined, X-GeoIP
|
||||
X-Cache: MISS from ww
|
||||
|
||||
<html>...
|
||||
EOF;
|
||||
$mdl30648expected = <<<EOF
|
||||
HTTP/1.0 200 OK
|
||||
Server: Apache
|
||||
X-Lb-Nocache: true
|
||||
Cache-Control: private, max-age=15
|
||||
ETag: "4d69af5d8ba873ea9192c489e151bd7b"
|
||||
Content-Type: text/html
|
||||
Date: Thu, 08 Dec 2011 14:44:53 GMT
|
||||
Set-Cookie: BBC-UID=c4de2e109c8df6a51de627cee11b214bd4fb6054a030222488317afb31b343360MoodleBot/1.0; expires=Mon, 07-Dec-15 14:44:53 GMT; path=/; domain=bbc.co.uk
|
||||
X-Cache-Action: MISS
|
||||
X-Cache-Age: 0
|
||||
Vary: Cookie,X-Country,X-Ip-is-uk-combined,X-Ip-is-advertise-combined,X-Ip_is_uk_combined,X-Ip_is_advertise_combined, X-GeoIP
|
||||
X-Cache: MISS from ww
|
||||
|
||||
<html>...
|
||||
EOF;
|
||||
// For HTTP, replace the \n with \r\n.
|
||||
$mdl30648example = preg_replace("~(?!<\r)\n~", "\r\n", $mdl30648example);
|
||||
$mdl30648expected = preg_replace("~(?!<\r)\n~", "\r\n", $mdl30648expected);
|
||||
|
||||
// Test stripping works OK.
|
||||
$this->assertEquals($mdl30648expected, curl::strip_double_headers($mdl30648example));
|
||||
// Test it does nothing to the 'plain' data.
|
||||
$this->assertEquals($mdl30648expected, curl::strip_double_headers($mdl30648expected));
|
||||
|
||||
// Example from OU proxy.
|
||||
$httpsexample = <<<EOF
|
||||
HTTP/1.0 200 Connection established
|
||||
|
||||
HTTP/1.1 200 OK
|
||||
Date: Fri, 22 Feb 2013 17:14:23 GMT
|
||||
Server: Apache/2
|
||||
X-Powered-By: PHP/5.3.3-7+squeeze14
|
||||
Content-Type: text/xml
|
||||
Connection: close
|
||||
Content-Encoding: gzip
|
||||
Transfer-Encoding: chunked
|
||||
|
||||
<?xml version="1.0" encoding="ISO-8859-1" ?>
|
||||
<rss version="2.0">...
|
||||
EOF;
|
||||
$httpsexpected = <<<EOF
|
||||
HTTP/1.1 200 OK
|
||||
Date: Fri, 22 Feb 2013 17:14:23 GMT
|
||||
Server: Apache/2
|
||||
X-Powered-By: PHP/5.3.3-7+squeeze14
|
||||
Content-Type: text/xml
|
||||
Connection: close
|
||||
Content-Encoding: gzip
|
||||
Transfer-Encoding: chunked
|
||||
|
||||
<?xml version="1.0" encoding="ISO-8859-1" ?>
|
||||
<rss version="2.0">...
|
||||
EOF;
|
||||
// For HTTP, replace the \n with \r\n.
|
||||
$httpsexample = preg_replace("~(?!<\r)\n~", "\r\n", $httpsexample);
|
||||
$httpsexpected = preg_replace("~(?!<\r)\n~", "\r\n", $httpsexpected);
|
||||
|
||||
// Test stripping works OK.
|
||||
$this->assertEquals($httpsexpected, curl::strip_double_headers($httpsexample));
|
||||
// Test it does nothing to the 'plain' data.
|
||||
$this->assertEquals($httpsexpected, curl::strip_double_headers($httpsexpected));
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue