pax_global_header 0000666 0000000 0000000 00000000064 12656251722 0014522 g ustar 00root root 0000000 0000000 52 comment=8f776343b0dada397c2a950a7c3f7be57442fa35
picoFeed-0.1.18/ 0000775 0000000 0000000 00000000000 12656251722 0013327 5 ustar 00root root 0000000 0000000 picoFeed-0.1.18/LICENSE 0000664 0000000 0000000 00000002073 12656251722 0014336 0 ustar 00root root 0000000 0000000 The MIT License (MIT)
Copyright (c) 2015 Frederic Guillot
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
picoFeed-0.1.18/lib/ 0000775 0000000 0000000 00000000000 12656251722 0014075 5 ustar 00root root 0000000 0000000 picoFeed-0.1.18/lib/PicoFeed/ 0000775 0000000 0000000 00000000000 12656251722 0015553 5 ustar 00root root 0000000 0000000 picoFeed-0.1.18/lib/PicoFeed/Client/ 0000775 0000000 0000000 00000000000 12656251722 0016771 5 ustar 00root root 0000000 0000000 picoFeed-0.1.18/lib/PicoFeed/Client/Client.php 0000664 0000000 0000000 00000033114 12656251722 0020722 0 ustar 00root root 0000000 0000000 request_headers = $headers;
}
/**
* Perform the HTTP request.
*
* @param string $url URL
*
* @return Client
*/
public function execute($url = '')
{
if ($url !== '') {
$this->url = $url;
}
Logger::setMessage(get_called_class().' Fetch URL: '.$this->url);
Logger::setMessage(get_called_class().' Etag provided: '.$this->etag);
Logger::setMessage(get_called_class().' Last-Modified provided: '.$this->last_modified);
$response = $this->doRequest();
$this->status_code = $response['status'];
$this->handleNotModifiedResponse($response);
$this->handleNotFoundResponse($response);
$this->handleNormalResponse($response);
return $this;
}
/**
* Handle not modified response.
*
* @param array $response Client response
*/
public function handleNotModifiedResponse(array $response)
{
if ($response['status'] == 304) {
$this->is_modified = false;
} elseif ($response['status'] == 200) {
$this->is_modified = $this->hasBeenModified($response, $this->etag, $this->last_modified);
$this->etag = $this->getHeader($response, 'ETag');
$this->last_modified = $this->getHeader($response, 'Last-Modified');
}
if ($this->is_modified === false) {
Logger::setMessage(get_called_class().' Resource not modified');
}
}
/**
* Handle not found response.
*
* @param array $response Client response
*/
public function handleNotFoundResponse(array $response)
{
if ($response['status'] == 404) {
throw new InvalidUrlException('Resource not found');
}
}
/**
* Handle normal response.
*
* @param array $response Client response
*/
public function handleNormalResponse(array $response)
{
if ($response['status'] == 200) {
$this->content = $response['body'];
$this->content_type = $this->findContentType($response);
$this->encoding = $this->findCharset();
}
}
/**
* Check if a request has been modified according to the parameters.
*
* @param array $response
* @param string $etag
* @param string $lastModified
*
* @return bool
*/
private function hasBeenModified($response, $etag, $lastModified)
{
$headers = array(
'Etag' => $etag,
'Last-Modified' => $lastModified,
);
// Compare the values for each header that is present
$presentCacheHeaderCount = 0;
foreach ($headers as $key => $value) {
if (isset($response['headers'][$key])) {
if ($response['headers'][$key] !== $value) {
return true;
}
++$presentCacheHeaderCount;
}
}
// If at least one header is present and the values match, the response
// was not modified
if ($presentCacheHeaderCount > 0) {
return false;
}
return true;
}
/**
* Find content type from response headers.
*
* @param array $response Client response
*
* @return string
*/
public function findContentType(array $response)
{
return strtolower($this->getHeader($response, 'Content-Type'));
}
/**
* Find charset from response headers.
*
* @return string
*/
public function findCharset()
{
$result = explode('charset=', $this->content_type);
return isset($result[1]) ? $result[1] : '';
}
/**
* Get header value from a client response.
*
* @param array $response Client response
* @param string $header Header name
*
* @return string
*/
public function getHeader(array $response, $header)
{
return isset($response['headers'][$header]) ? $response['headers'][$header] : '';
}
/**
* Set the Last-Modified HTTP header.
*
* @param string $last_modified Header value
*
* @return \PicoFeed\Client\Client
*/
public function setLastModified($last_modified)
{
$this->last_modified = $last_modified;
return $this;
}
/**
* Get the value of the Last-Modified HTTP header.
*
* @return string
*/
public function getLastModified()
{
return $this->last_modified;
}
/**
* Set the value of the Etag HTTP header.
*
* @param string $etag Etag HTTP header value
*
* @return \PicoFeed\Client\Client
*/
public function setEtag($etag)
{
$this->etag = $etag;
return $this;
}
/**
* Get the Etag HTTP header value.
*
* @return string
*/
public function getEtag()
{
return $this->etag;
}
/**
* Get the final url value.
*
* @return string
*/
public function getUrl()
{
return $this->url;
}
/**
* Set the url.
*
* @return string
* @return \PicoFeed\Client\Client
*/
public function setUrl($url)
{
$this->url = $url;
return $this;
}
/**
* Get the HTTP response status code.
*
* @return int
*/
public function getStatusCode()
{
return $this->status_code;
}
/**
* Get the body of the HTTP response.
*
* @return string
*/
public function getContent()
{
return $this->content;
}
/**
* Get the content type value from HTTP headers.
*
* @return string
*/
public function getContentType()
{
return $this->content_type;
}
/**
* Get the encoding value from HTTP headers.
*
* @return string
*/
public function getEncoding()
{
return $this->encoding;
}
/**
* Return true if the remote resource has changed.
*
* @return bool
*/
public function isModified()
{
return $this->is_modified;
}
/**
* return true if passthrough mode is enabled.
*
* @return bool
*/
public function isPassthroughEnabled()
{
return $this->passthrough;
}
/**
* Set connection timeout.
*
* @param int $timeout Connection timeout
*
* @return \PicoFeed\Client\Client
*/
public function setTimeout($timeout)
{
$this->timeout = $timeout ?: $this->timeout;
return $this;
}
/**
* Set a custom user agent.
*
* @param string $user_agent User Agent
*
* @return \PicoFeed\Client\Client
*/
public function setUserAgent($user_agent)
{
$this->user_agent = $user_agent ?: $this->user_agent;
return $this;
}
/**
* Set the maximum number of HTTP redirections.
*
* @param int $max Maximum
*
* @return \PicoFeed\Client\Client
*/
public function setMaxRedirections($max)
{
$this->max_redirects = $max ?: $this->max_redirects;
return $this;
}
/**
* Set the maximum size of the HTTP body.
*
* @param int $max Maximum
*
* @return \PicoFeed\Client\Client
*/
public function setMaxBodySize($max)
{
$this->max_body_size = $max ?: $this->max_body_size;
return $this;
}
/**
* Set the proxy hostname.
*
* @param string $hostname Proxy hostname
*
* @return \PicoFeed\Client\Client
*/
public function setProxyHostname($hostname)
{
$this->proxy_hostname = $hostname ?: $this->proxy_hostname;
return $this;
}
/**
* Set the proxy port.
*
* @param int $port Proxy port
*
* @return \PicoFeed\Client\Client
*/
public function setProxyPort($port)
{
$this->proxy_port = $port ?: $this->proxy_port;
return $this;
}
/**
* Set the proxy username.
*
* @param string $username Proxy username
*
* @return \PicoFeed\Client\Client
*/
public function setProxyUsername($username)
{
$this->proxy_username = $username ?: $this->proxy_username;
return $this;
}
/**
* Set the proxy password.
*
* @param string $password Password
*
* @return \PicoFeed\Client\Client
*/
public function setProxyPassword($password)
{
$this->proxy_password = $password ?: $this->proxy_password;
return $this;
}
/**
* Set the username.
*
* @param string $username Basic Auth username
*
* @return \PicoFeed\Client\Client
*/
public function setUsername($username)
{
$this->username = $username ?: $this->username;
return $this;
}
/**
* Set the password.
*
* @param string $password Basic Auth Password
*
* @return \PicoFeed\Client\Client
*/
public function setPassword($password)
{
$this->password = $password ?: $this->password;
return $this;
}
/**
* Enable the passthrough mode.
*
* @return \PicoFeed\Client\Client
*/
public function enablePassthroughMode()
{
$this->passthrough = true;
return $this;
}
/**
* Disable the passthrough mode.
*
* @return \PicoFeed\Client\Client
*/
public function disablePassthroughMode()
{
$this->passthrough = false;
return $this;
}
/**
* Set config object.
*
* @param \PicoFeed\Config\Config $config Config instance
*
* @return \PicoFeed\Client\Client
*/
public function setConfig(Config $config)
{
if ($config !== null) {
$this->setTimeout($config->getClientTimeout());
$this->setUserAgent($config->getClientUserAgent());
$this->setMaxRedirections($config->getMaxRedirections());
$this->setMaxBodySize($config->getMaxBodySize());
$this->setProxyHostname($config->getProxyHostname());
$this->setProxyPort($config->getProxyPort());
$this->setProxyUsername($config->getProxyUsername());
$this->setProxyPassword($config->getProxyPassword());
}
return $this;
}
/**
* Return true if the HTTP status code is a redirection
*
* @access protected
* @param integer $code
* @return boolean
*/
public function isRedirection($code)
{
return $code == 301 || $code == 302 || $code == 303 || $code == 307;
}
}
picoFeed-0.1.18/lib/PicoFeed/Client/ClientException.php 0000664 0000000 0000000 00000000307 12656251722 0022577 0 ustar 00root root 0000000 0000000 body_length += $length;
if ($this->body_length > $this->max_body_size) {
return -1;
}
$this->body .= $buffer;
return $length;
}
/**
* cURL callback to read HTTP headers.
*
* @param resource $ch cURL handler
* @param string $buffer Header line
*
* @return int Length of the buffer
*/
public function readHeaders($ch, $buffer)
{
$length = strlen($buffer);
if ($buffer === "\r\n" || $buffer === "\n") {
++$this->response_headers_count;
} else {
if (!isset($this->response_headers[$this->response_headers_count])) {
$this->response_headers[$this->response_headers_count] = '';
}
$this->response_headers[$this->response_headers_count] .= $buffer;
}
return $length;
}
/**
* cURL callback to passthrough the HTTP body to the client.
*
* If the function return -1, curl stop to read the HTTP response
*
* @param resource $ch cURL handler
* @param string $buffer Chunk of data
*
* @return int Length of the buffer
*/
public function passthroughBody($ch, $buffer)
{
// do it only at the beginning of a transmission
if ($this->body_length === 0) {
list($status, $headers) = HttpHeaders::parse(explode("\n", $this->response_headers[$this->response_headers_count - 1]));
if ($this->isRedirection($status)) {
return $this->handleRedirection($headers['Location']);
}
header($status);
if (isset($headers['Content-Type'])) {
header('Content-Type:' .$headers['Content-Type']);
}
}
$length = strlen($buffer);
$this->body_length += $length;
echo $buffer;
return $length;
}
/**
* Prepare HTTP headers.
*
* @return string[]
*/
private function prepareHeaders()
{
$headers = array(
'Connection: close',
);
if ($this->etag) {
$headers[] = 'If-None-Match: '.$this->etag;
}
if ($this->last_modified) {
$headers[] = 'If-Modified-Since: '.$this->last_modified;
}
$headers = array_merge($headers, $this->request_headers);
return $headers;
}
/**
* Prepare curl proxy context.
*
* @param resource $ch
*
* @return resource $ch
*/
private function prepareProxyContext($ch)
{
if ($this->proxy_hostname) {
Logger::setMessage(get_called_class().' Proxy: '.$this->proxy_hostname.':'.$this->proxy_port);
curl_setopt($ch, CURLOPT_PROXYPORT, $this->proxy_port);
curl_setopt($ch, CURLOPT_PROXYTYPE, 'HTTP');
curl_setopt($ch, CURLOPT_PROXY, $this->proxy_hostname);
if ($this->proxy_username) {
Logger::setMessage(get_called_class().' Proxy credentials: Yes');
curl_setopt($ch, CURLOPT_PROXYUSERPWD, $this->proxy_username.':'.$this->proxy_password);
} else {
Logger::setMessage(get_called_class().' Proxy credentials: No');
}
}
return $ch;
}
/**
* Prepare curl auth context.
*
* @param resource $ch
*
* @return resource $ch
*/
private function prepareAuthContext($ch)
{
if ($this->username && $this->password) {
curl_setopt($ch, CURLOPT_USERPWD, $this->username.':'.$this->password);
}
return $ch;
}
/**
* Set write/header functions.
*
* @param resource $ch
*
* @return resource $ch
*/
private function prepareDownloadMode($ch)
{
$write_function = 'readBody';
$header_function = 'readHeaders';
if ($this->isPassthroughEnabled()) {
$write_function = 'passthroughBody';
}
curl_setopt($ch, CURLOPT_WRITEFUNCTION, array($this, $write_function));
curl_setopt($ch, CURLOPT_HEADERFUNCTION, array($this, $header_function));
return $ch;
}
/**
* Prepare curl context.
*
* @return resource
*/
private function prepareContext()
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $this->url);
curl_setopt($ch, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1);
curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $this->timeout);
curl_setopt($ch, CURLOPT_USERAGENT, $this->user_agent);
curl_setopt($ch, CURLOPT_HTTPHEADER, $this->prepareHeaders());
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false);
curl_setopt($ch, CURLOPT_ENCODING, '');
curl_setopt($ch, CURLOPT_COOKIEJAR, 'php://memory');
curl_setopt($ch, CURLOPT_COOKIEFILE, 'php://memory');
// Disable SSLv3 by enforcing TLSv1.x for curl >= 7.34.0 and < 7.39.0.
// Versions prior to 7.34 and at least when compiled against openssl
// interpret this parameter as "limit to TLSv1.0" which fails for sites
// which enforce TLS 1.1+.
// Starting with curl 7.39.0 SSLv3 is disabled by default.
$version = curl_version();
if ($version['version_number'] >= 467456 && $version['version_number'] < 468736) {
curl_setopt($ch, CURLOPT_SSLVERSION, 1);
}
$ch = $this->prepareDownloadMode($ch);
$ch = $this->prepareProxyContext($ch);
$ch = $this->prepareAuthContext($ch);
return $ch;
}
/**
* Execute curl context.
*/
private function executeContext()
{
$ch = $this->prepareContext();
curl_exec($ch);
Logger::setMessage(get_called_class().' cURL total time: '.curl_getinfo($ch, CURLINFO_TOTAL_TIME));
Logger::setMessage(get_called_class().' cURL dns lookup time: '.curl_getinfo($ch, CURLINFO_NAMELOOKUP_TIME));
Logger::setMessage(get_called_class().' cURL connect time: '.curl_getinfo($ch, CURLINFO_CONNECT_TIME));
Logger::setMessage(get_called_class().' cURL speed download: '.curl_getinfo($ch, CURLINFO_SPEED_DOWNLOAD));
Logger::setMessage(get_called_class().' cURL effective url: '.curl_getinfo($ch, CURLINFO_EFFECTIVE_URL));
$curl_errno = curl_errno($ch);
if ($curl_errno) {
Logger::setMessage(get_called_class().' cURL error: '.curl_error($ch));
curl_close($ch);
$this->handleError($curl_errno);
}
// Update the url if there where redirects
$this->url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
curl_close($ch);
}
/**
* Do the HTTP request.
*
* @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...]
*/
public function doRequest()
{
$this->executeContext();
list($status, $headers) = HttpHeaders::parse(explode("\n", $this->response_headers[$this->response_headers_count - 1]));
if ($this->isRedirection($status)) {
return $this->handleRedirection($headers['Location']);
}
return array(
'status' => $status,
'body' => $this->body,
'headers' => $headers,
);
}
/**
* Handle HTTP redirects
*
* @param string $location Redirected URL
*
* @return array
*/
private function handleRedirection($location)
{
$nb_redirects = 0;
$result = array();
$this->url = Url::resolve($location, $this->url);
$this->body = '';
$this->body_length = 0;
$this->response_headers = array();
$this->response_headers_count = 0;
while (true) {
++$nb_redirects;
if ($nb_redirects >= $this->max_redirects) {
throw new MaxRedirectException('Maximum number of redirections reached');
}
$result = $this->doRequest();
if ($this->isRedirection($result['status'])) {
$this->url = Url::resolve($result['headers']['Location'], $this->url);
$this->body = '';
$this->body_length = 0;
$this->response_headers = array();
$this->response_headers_count = 0;
} else {
break;
}
}
return $result;
}
/**
* Handle cURL errors (throw individual exceptions).
*
* We don't use constants because they are not necessary always available
* (depends of the version of libcurl linked to php)
*
* @see http://curl.haxx.se/libcurl/c/libcurl-errors.html
*
* @param int $errno cURL error code
*/
private function handleError($errno)
{
switch ($errno) {
case 78: // CURLE_REMOTE_FILE_NOT_FOUND
throw new InvalidUrlException('Resource not found');
case 6: // CURLE_COULDNT_RESOLVE_HOST
throw new InvalidUrlException('Unable to resolve hostname');
case 7: // CURLE_COULDNT_CONNECT
throw new InvalidUrlException('Unable to connect to the remote host');
case 23: // CURLE_WRITE_ERROR
throw new MaxSizeException('Maximum response size exceeded');
case 28: // CURLE_OPERATION_TIMEDOUT
throw new TimeoutException('Operation timeout');
case 35: // CURLE_SSL_CONNECT_ERROR
case 51: // CURLE_PEER_FAILED_VERIFICATION
case 58: // CURLE_SSL_CERTPROBLEM
case 60: // CURLE_SSL_CACERT
case 59: // CURLE_SSL_CIPHER
case 64: // CURLE_USE_SSL_FAILED
case 66: // CURLE_SSL_ENGINE_INITFAILED
case 77: // CURLE_SSL_CACERT_BADFILE
case 83: // CURLE_SSL_ISSUER_ERROR
throw new InvalidCertificateException('Invalid SSL certificate');
case 47: // CURLE_TOO_MANY_REDIRECTS
throw new MaxRedirectException('Maximum number of redirections reached');
case 63: // CURLE_FILESIZE_EXCEEDED
throw new MaxSizeException('Maximum response size exceeded');
default:
throw new InvalidUrlException('Unable to fetch the URL');
}
}
}
picoFeed-0.1.18/lib/PicoFeed/Client/HttpHeaders.php 0000664 0000000 0000000 00000003462 12656251722 0021722 0 ustar 00root root 0000000 0000000 $value) {
$this->headers[strtolower($key)] = $value;
}
}
public function offsetGet($offset)
{
return $this->headers[strtolower($offset)];
}
public function offsetSet($offset, $value)
{
$this->headers[strtolower($offset)] = $value;
}
public function offsetExists($offset)
{
return isset($this->headers[strtolower($offset)]);
}
public function offsetUnset($offset)
{
unset($this->headers[strtolower($offset)]);
}
/**
* Parse HTTP headers.
*
* @static
*
* @param array $lines List of headers
*
* @return array
*/
public static function parse(array $lines)
{
$status = 0;
$headers = array();
foreach ($lines as $line) {
if (strpos($line, 'HTTP/1') === 0) {
$headers = array();
$status = (int) substr($line, 9, 3);
} elseif (strpos($line, ': ') !== false) {
list($name, $value) = explode(': ', $line);
if ($value) {
$headers[trim($name)] = trim($value);
}
}
}
Logger::setMessage(get_called_class().' HTTP status code: '.$status);
foreach ($headers as $name => $value) {
Logger::setMessage(get_called_class().' HTTP header: '.$name.' => '.$value);
}
return array($status, new self($headers));
}
}
picoFeed-0.1.18/lib/PicoFeed/Client/InvalidCertificateException.php 0000664 0000000 0000000 00000000263 12656251722 0025113 0 ustar 00root root 0000000 0000000 user_agent,
);
// disable compression in passthrough mode. It could result in double
// compressed content which isn't decodeable by browsers
if (function_exists('gzdecode') && !$this->isPassthroughEnabled()) {
$headers[] = 'Accept-Encoding: gzip';
}
if ($this->etag) {
$headers[] = 'If-None-Match: '.$this->etag;
}
if ($this->last_modified) {
$headers[] = 'If-Modified-Since: '.$this->last_modified;
}
if ($this->proxy_username) {
$headers[] = 'Proxy-Authorization: Basic '.base64_encode($this->proxy_username.':'.$this->proxy_password);
}
if ($this->username && $this->password) {
$headers[] = 'Authorization: Basic '.base64_encode($this->username.':'.$this->password);
}
$headers = array_merge($headers, $this->request_headers);
return $headers;
}
/**
* Construct the final URL from location headers.
*
* @param array $headers List of HTTP response header
*/
private function setEffectiveUrl($headers)
{
foreach ($headers as $header) {
if (stripos($header, 'Location') === 0) {
list(, $value) = explode(': ', $header);
$this->url = Url::resolve($value, $this->url);
}
}
}
/**
* Prepare stream context.
*
* @return array
*/
private function prepareContext()
{
$context = array(
'http' => array(
'method' => 'GET',
'protocol_version' => 1.1,
'timeout' => $this->timeout,
'max_redirects' => $this->max_redirects,
),
);
if ($this->proxy_hostname) {
Logger::setMessage(get_called_class().' Proxy: '.$this->proxy_hostname.':'.$this->proxy_port);
$context['http']['proxy'] = 'tcp://'.$this->proxy_hostname.':'.$this->proxy_port;
$context['http']['request_fulluri'] = true;
if ($this->proxy_username) {
Logger::setMessage(get_called_class().' Proxy credentials: Yes');
} else {
Logger::setMessage(get_called_class().' Proxy credentials: No');
}
}
$context['http']['header'] = implode("\r\n", $this->prepareHeaders());
return $context;
}
/**
* Do the HTTP request.
*
* @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...]
*/
public function doRequest()
{
$body = '';
// Create context
$context = stream_context_create($this->prepareContext());
// Make HTTP request
$stream = @fopen($this->url, 'r', false, $context);
if (!is_resource($stream)) {
throw new InvalidUrlException('Unable to establish a connection');
}
// Get HTTP headers response
$metadata = stream_get_meta_data($stream);
list($status, $headers) = HttpHeaders::parse($metadata['wrapper_data']);
if ($this->isPassthroughEnabled()) {
header(':', true, $status);
if (isset($headers['Content-Type'])) {
header('Content-Type: '.$headers['Content-Type']);
}
fpassthru($stream);
} else {
// Get the entire body until the max size
$body = stream_get_contents($stream, $this->max_body_size + 1);
// If the body size is too large abort everything
if (strlen($body) > $this->max_body_size) {
throw new MaxSizeException('Content size too large');
}
if ($metadata['timed_out']) {
throw new TimeoutException('Operation timeout');
}
}
fclose($stream);
$this->setEffectiveUrl($metadata['wrapper_data']);
return array(
'status' => $status,
'body' => $this->decodeBody($body, $headers),
'headers' => $headers,
);
}
/**
* Decode body response according to the HTTP headers.
*
* @param string $body Raw body
* @param HttpHeaders $headers HTTP headers
*
* @return string
*/
public function decodeBody($body, HttpHeaders $headers)
{
if (isset($headers['Transfer-Encoding']) && $headers['Transfer-Encoding'] === 'chunked') {
$body = $this->decodeChunked($body);
}
if (isset($headers['Content-Encoding']) && $headers['Content-Encoding'] === 'gzip') {
$body = gzdecode($body);
}
return $body;
}
/**
* Decode a chunked body.
*
* @param string $str Raw body
*
* @return string Decoded body
*/
public function decodeChunked($str)
{
for ($result = ''; !empty($str); $str = trim($str)) {
// Get the chunk length
$pos = strpos($str, "\r\n");
$len = hexdec(substr($str, 0, $pos));
// Append the chunk to the result
$result .= substr($str, $pos + 2, $len);
$str = substr($str, $pos + 2 + $len);
}
return $result;
}
}
picoFeed-0.1.18/lib/PicoFeed/Client/TimeoutException.php 0000664 0000000 0000000 00000000235 12656251722 0023007 0 ustar 00root root 0000000 0000000 url = $url;
$this->components = parse_url($url) ?: array();
// Issue with PHP < 5.4.7 and protocol relative url
if (version_compare(PHP_VERSION, '5.4.7', '<') && $this->isProtocolRelative()) {
$pos = strpos($this->components['path'], '/', 2);
if ($pos === false) {
$pos = strlen($this->components['path']);
}
$this->components['host'] = substr($this->components['path'], 2, $pos - 2);
$this->components['path'] = substr($this->components['path'], $pos);
}
}
/**
* Shortcut method to get an absolute url from relative url.
*
* @static
*
* @param mixed $item_url Unknown url (can be relative or not)
* @param mixed $website_url Website url
*
* @return string
*/
public static function resolve($item_url, $website_url)
{
$link = is_string($item_url) ? new self($item_url) : $item_url;
$website = is_string($website_url) ? new self($website_url) : $website_url;
if ($link->isRelativeUrl()) {
if ($link->isRelativePath()) {
return $link->getAbsoluteUrl($website->getBaseUrl($website->getBasePath()));
}
return $link->getAbsoluteUrl($website->getBaseUrl());
} elseif ($link->isProtocolRelative()) {
$link->setScheme($website->getScheme());
}
return $link->getAbsoluteUrl();
}
/**
* Shortcut method to get a base url.
*
* @static
*
* @param string $url
*
* @return string
*/
public static function base($url)
{
$link = new self($url);
return $link->getBaseUrl();
}
/**
* Get the base URL.
*
* @param string $suffix Add a suffix to the url
*
* @return string
*/
public function getBaseUrl($suffix = '')
{
return $this->hasHost() ? $this->getScheme('://').$this->getHost().$this->getPort(':').$suffix : '';
}
/**
* Get the absolute URL.
*
* @param string $base_url Use this url as base url
*
* @return string
*/
public function getAbsoluteUrl($base_url = '')
{
if ($base_url) {
$base = new self($base_url);
$url = $base->getAbsoluteUrl().substr($this->getFullPath(), 1);
} else {
$url = $this->hasHost() ? $this->getBaseUrl().$this->getFullPath() : '';
}
return $url;
}
/**
* Return true if the url is relative.
*
* @return bool
*/
public function isRelativeUrl()
{
return !$this->hasScheme() && !$this->isProtocolRelative();
}
/**
* Return true if the path is relative.
*
* @return bool
*/
public function isRelativePath()
{
$path = $this->getPath();
return empty($path) || $path{0}
!== '/';
}
/**
* Filters the path of a URI.
*
* Imported from Guzzle library: https://github.com/guzzle/psr7/blob/master/src/Uri.php#L568-L582
*
* @param $path
*
* @return string
*/
public function filterPath($path, $charUnreserved = 'a-zA-Z0-9_\-\.~', $charSubDelims = '!\$&\'\(\)\*\+,;=')
{
return preg_replace_callback(
'/(?:[^'.$charUnreserved.$charSubDelims.':@\/%]+|%(?![A-Fa-f0-9]{2}))/',
function (array $matches) { return rawurlencode($matches[0]); },
$path
);
}
/**
* Get the path.
*
* @return string
*/
public function getPath()
{
return $this->filterPath(empty($this->components['path']) ? '' : $this->components['path']);
}
/**
* Get the base path.
*
* @return string
*/
public function getBasePath()
{
$current_path = $this->getPath();
$path = $this->isRelativePath() ? '/' : '';
$path .= substr($current_path, -1) === '/' ? $current_path : dirname($current_path);
return preg_replace('/\\\\\/|\/\//', '/', $path.'/');
}
/**
* Get the full path (path + querystring + fragment).
*
* @return string
*/
public function getFullPath()
{
$path = $this->isRelativePath() ? '/' : '';
$path .= $this->getPath();
$path .= empty($this->components['query']) ? '' : '?'.$this->components['query'];
$path .= empty($this->components['fragment']) ? '' : '#'.$this->components['fragment'];
return $path;
}
/**
* Get the hostname.
*
* @return string
*/
public function getHost()
{
return empty($this->components['host']) ? '' : $this->components['host'];
}
/**
* Return true if the url has a hostname.
*
* @return bool
*/
public function hasHost()
{
return !empty($this->components['host']);
}
/**
* Get the scheme.
*
* @param string $suffix Suffix to add when there is a scheme
*
* @return string
*/
public function getScheme($suffix = '')
{
return ($this->hasScheme() ? $this->components['scheme'] : 'http').$suffix;
}
/**
* Set the scheme.
*
* @param string $scheme Set a scheme
*
* @return string
*/
public function setScheme($scheme)
{
$this->components['scheme'] = $scheme;
}
/**
* Return true if the url has a scheme.
*
* @return bool
*/
public function hasScheme()
{
return !empty($this->components['scheme']);
}
/**
* Get the port.
*
* @param string $prefix Prefix to add when there is a port
*
* @return string
*/
public function getPort($prefix = '')
{
return $this->hasPort() ? $prefix.$this->components['port'] : '';
}
/**
* Return true if the url has a port.
*
* @return bool
*/
public function hasPort()
{
return !empty($this->components['port']);
}
/**
* Return true if the url is protocol relative (start with //).
*
* @return bool
*/
public function isProtocolRelative()
{
return strpos($this->url, '//') === 0;
}
}
picoFeed-0.1.18/lib/PicoFeed/Config/ 0000775 0000000 0000000 00000000000 12656251722 0016760 5 ustar 00root root 0000000 0000000 picoFeed-0.1.18/lib/PicoFeed/Config/Config.php 0000664 0000000 0000000 00000010222 12656251722 0020673 0 ustar 00root root 0000000 0000000 container[$parameter] = $arguments[0];
return $this;
} elseif ($prefix === 'get') {
$default_value = isset($arguments[0]) ? $arguments[0] : null;
return isset($this->container[$parameter]) ? $this->container[$parameter] : $default_value;
}
}
}
picoFeed-0.1.18/lib/PicoFeed/Encoding/ 0000775 0000000 0000000 00000000000 12656251722 0017301 5 ustar 00root root 0000000 0000000 picoFeed-0.1.18/lib/PicoFeed/Encoding/Encoding.php 0000664 0000000 0000000 00000001500 12656251722 0021534 0 ustar 00root root 0000000 0000000 array('controls', 'src'),
'video' => array('poster', 'controls', 'height', 'width', 'src'),
'source' => array('src', 'type'),
'dt' => array(),
'dd' => array(),
'dl' => array(),
'table' => array(),
'caption' => array(),
'tr' => array(),
'th' => array(),
'td' => array(),
'tbody' => array(),
'thead' => array(),
'h2' => array(),
'h3' => array(),
'h4' => array(),
'h5' => array(),
'h6' => array(),
'strong' => array(),
'em' => array(),
'code' => array(),
'pre' => array(),
'blockquote' => array(),
'p' => array(),
'ul' => array(),
'li' => array(),
'ol' => array(),
'br' => array(),
'del' => array(),
'a' => array('href'),
'img' => array('src', 'title', 'alt'),
'figure' => array(),
'figcaption' => array(),
'cite' => array(),
'time' => array('datetime'),
'abbr' => array('title'),
'iframe' => array('width', 'height', 'frameborder', 'src', 'allowfullscreen'),
'q' => array('cite'),
);
/**
* Scheme whitelist.
*
* For a complete list go to http://en.wikipedia.org/wiki/URI_scheme
*
* @var array
*/
private $scheme_whitelist = array(
'bitcoin:',
'callto:',
'ed2k://',
'facetime://',
'feed:',
'ftp://',
'geo:',
'git://',
'http://',
'https://',
'irc://',
'irc6://',
'ircs://',
'jabber:',
'magnet:',
'mailto:',
'nntp://',
'rtmp://',
'sftp://',
'sip:',
'sips:',
'skype:',
'smb://',
'sms:',
'spotify:',
'ssh:',
'steam:',
'svn://',
'tel:',
);
/**
* Iframe source whitelist, everything else is ignored.
*
* @var array
*/
private $iframe_whitelist = array(
'http://www.youtube.com',
'https://www.youtube.com',
'http://player.vimeo.com',
'https://player.vimeo.com',
'http://www.dailymotion.com',
'https://www.dailymotion.com',
'http://vk.com',
'https://vk.com',
);
/**
* Blacklisted resources.
*
* @var array
*/
private $media_blacklist = array(
'api.flattr.com',
'feeds.feedburner.com',
'share.feedsportal.com',
'da.feedsportal.com',
'rc.feedsportal.com',
'rss.feedsportal.com',
'res.feedsportal.com',
'res1.feedsportal.com',
'res2.feedsportal.com',
'res3.feedsportal.com',
'pi.feedsportal.com',
'rss.nytimes.com',
'feeds.wordpress.com',
'stats.wordpress.com',
'rss.cnn.com',
'twitter.com/home?status=',
'twitter.com/share',
'twitter_icon_large.png',
'www.facebook.com/sharer.php',
'facebook_icon_large.png',
'plus.google.com/share',
'www.gstatic.com/images/icons/gplus-16.png',
'www.gstatic.com/images/icons/gplus-32.png',
'www.gstatic.com/images/icons/gplus-64.png',
);
/**
* Attributes used for external resources.
*
* @var array
*/
private $media_attributes = array(
'src',
'href',
'poster',
);
/**
* Attributes that must be integer.
*
* @var array
*/
private $integer_attributes = array(
'width',
'height',
'frameborder',
);
/**
* Mandatory attributes for specified tags.
*
* @var array
*/
private $required_attributes = array(
'a' => array('href'),
'img' => array('src'),
'iframe' => array('src'),
'audio' => array('src'),
'source' => array('src'),
);
/**
* Add attributes to specified tags.
*
* @var array
*/
private $add_attributes = array(
'a' => array('rel' => 'noreferrer', 'target' => '_blank'),
'video' => array('controls' => 'true'),
);
/**
* List of filters to apply.
*
* @var array
*/
private $filters = array(
'filterAllowedAttribute',
'filterIntegerAttribute',
'rewriteAbsoluteUrl',
'filterIframeAttribute',
'filterBlacklistResourceAttribute',
'filterProtocolUrlAttribute',
'rewriteImageProxyUrl',
'secureIframeSrc',
'removeYouTubeAutoplay',
);
/**
* Add attributes to specified tags.
*
* @var \PicoFeed\Client\Url
*/
private $website;
/**
* Constructor.
*
* @param \PicoFeed\Client\Url $website Website url instance
*/
public function __construct(Url $website)
{
$this->website = $website;
}
/**
* Apply filters to the attributes list.
*
* @param string $tag Tag name
* @param array $attributes Attributes dictionary
*
* @return array Filtered attributes
*/
public function filter($tag, array $attributes)
{
foreach ($attributes as $attribute => &$value) {
foreach ($this->filters as $filter) {
if (!$this->$filter($tag, $attribute, $value)) {
unset($attributes[$attribute]);
break;
}
}
}
return $attributes;
}
/**
* Return true if the value is allowed (remove not allowed attributes).
*
* @param string $tag Tag name
* @param string $attribute Attribute name
* @param string $value Attribute value
*
* @return bool
*/
public function filterAllowedAttribute($tag, $attribute, $value)
{
return isset($this->attribute_whitelist[$tag]) && in_array($attribute, $this->attribute_whitelist[$tag]);
}
/**
* Return true if the value is not integer (remove attributes that should have an integer value).
*
* @param string $tag Tag name
* @param string $attribute Attribute name
* @param string $value Attribute value
*
* @return bool
*/
public function filterIntegerAttribute($tag, $attribute, $value)
{
if (in_array($attribute, $this->integer_attributes)) {
return ctype_digit($value);
}
return true;
}
/**
* Return true if the iframe source is allowed (remove not allowed iframe).
*
* @param string $tag Tag name
* @param string $attribute Attribute name
* @param string $value Attribute value
*
* @return bool
*/
public function filterIframeAttribute($tag, $attribute, $value)
{
if ($tag === 'iframe' && $attribute === 'src') {
foreach ($this->iframe_whitelist as $url) {
if (strpos($value, $url) === 0) {
return true;
}
}
return false;
}
return true;
}
/**
* Return true if the resource is not blacklisted (remove blacklisted resource attributes).
*
* @param string $tag Tag name
* @param string $attribute Attribute name
* @param string $value Attribute value
*
* @return bool
*/
public function filterBlacklistResourceAttribute($tag, $attribute, $value)
{
if ($this->isResource($attribute) && $this->isBlacklistedMedia($value)) {
return false;
}
return true;
}
/**
* Convert all relative links to absolute url.
*
* @param string $tag Tag name
* @param string $attribute Attribute name
* @param string $value Attribute value
*
* @return bool
*/
public function rewriteAbsoluteUrl($tag, $attribute, &$value)
{
if ($this->isResource($attribute)) {
$value = Url::resolve($value, $this->website);
}
return true;
}
/**
* Turns iframes' src attribute from http to https to prevent
* mixed active content.
*
* @param string $tag Tag name
* @param array $attribute Atttributes name
* @param string $value Attribute value
*
* @return bool
*/
public function secureIframeSrc($tag, $attribute, &$value)
{
if ($tag === 'iframe' && $attribute === 'src' && strpos($value, 'http://') === 0) {
$value = substr_replace($value, 's', 4, 0);
}
return true;
}
/**
* Removes YouTube autoplay from iframes.
*
* @param string $tag Tag name
* @param array $attribute Atttributes name
* @param string $value Attribute value
*
* @return bool
*/
public function removeYouTubeAutoplay($tag, $attribute, &$value)
{
$regex = '%^(https://(?:www\.)?youtube.com/.*\?.*autoplay=)(1)(.*)%i';
if ($tag === 'iframe' && $attribute === 'src' && preg_match($regex, $value)) {
$value = preg_replace($regex, '${1}0$3', $value);
}
return true;
}
/**
* Rewrite image url to use with a proxy.
*
* @param string $tag Tag name
* @param string $attribute Attribute name
* @param string $value Attribute value
*
* @return bool
*/
public function rewriteImageProxyUrl($tag, $attribute, &$value)
{
if ($tag === 'img' && $attribute === 'src'
&& !($this->image_proxy_limit_protocol !== '' && stripos($value, $this->image_proxy_limit_protocol.':') !== 0)) {
if ($this->image_proxy_url) {
$value = sprintf($this->image_proxy_url, rawurlencode($value));
} elseif (is_callable($this->image_proxy_callback)) {
$value = call_user_func($this->image_proxy_callback, $value);
}
}
return true;
}
/**
* Return true if the scheme is authorized.
*
* @param string $tag Tag name
* @param string $attribute Attribute name
* @param string $value Attribute value
*
* @return bool
*/
public function filterProtocolUrlAttribute($tag, $attribute, $value)
{
if ($this->isResource($attribute) && !$this->isAllowedProtocol($value)) {
return false;
}
return true;
}
/**
* Automatically add/override some attributes for specific tags.
*
* @param string $tag Tag name
* @param array $attributes Attributes list
*
* @return array
*/
public function addAttributes($tag, array $attributes)
{
if (isset($this->add_attributes[$tag])) {
$attributes += $this->add_attributes[$tag];
}
return $attributes;
}
/**
* Return true if all required attributes are present.
*
* @param string $tag Tag name
* @param array $attributes Attributes list
*
* @return bool
*/
public function hasRequiredAttributes($tag, array $attributes)
{
if (isset($this->required_attributes[$tag])) {
foreach ($this->required_attributes[$tag] as $attribute) {
if (!isset($attributes[$attribute])) {
return false;
}
}
}
return true;
}
/**
* Check if an attribute name is an external resource.
*
* @param string $attribute Attribute name
*
* @return bool
*/
public function isResource($attribute)
{
return in_array($attribute, $this->media_attributes);
}
/**
* Detect if the protocol is allowed or not.
*
* @param string $value Attribute value
*
* @return bool
*/
public function isAllowedProtocol($value)
{
foreach ($this->scheme_whitelist as $protocol) {
if (strpos($value, $protocol) === 0) {
return true;
}
}
return false;
}
/**
* Detect if an url is blacklisted.
*
* @param string $resource Attribute value (URL)
*
* @return bool
*/
public function isBlacklistedMedia($resource)
{
foreach ($this->media_blacklist as $name) {
if (strpos($resource, $name) !== false) {
return true;
}
}
return false;
}
/**
* Convert the attribute list to html.
*
* @param array $attributes Attributes
*
* @return string
*/
public function toHtml(array $attributes)
{
$html = array();
foreach ($attributes as $attribute => $value) {
$html[] = sprintf('%s="%s"', $attribute, Filter::escape($value));
}
return implode(' ', $html);
}
/**
* Set whitelisted tags and attributes for each tag.
*
* @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']]
*
* @return Attribute
*/
public function setWhitelistedAttributes(array $values)
{
$this->attribute_whitelist = $values ?: $this->attribute_whitelist;
return $this;
}
/**
* Set scheme whitelist.
*
* @param array $values List of scheme: ['http://', 'ftp://']
*
* @return Attribute
*/
public function setSchemeWhitelist(array $values)
{
$this->scheme_whitelist = $values ?: $this->scheme_whitelist;
return $this;
}
/**
* Set media attributes (used to load external resources).
*
* @param array $values List of values: ['src', 'href']
*
* @return Attribute
*/
public function setMediaAttributes(array $values)
{
$this->media_attributes = $values ?: $this->media_attributes;
return $this;
}
/**
* Set blacklisted external resources.
*
* @param array $values List of tags: ['http://google.com/', '...']
*
* @return Attribute
*/
public function setMediaBlacklist(array $values)
{
$this->media_blacklist = $values ?: $this->media_blacklist;
return $this;
}
/**
* Set mandatory attributes for whitelisted tags.
*
* @param array $values List of tags: ['img' => 'src']
*
* @return Attribute
*/
public function setRequiredAttributes(array $values)
{
$this->required_attributes = $values ?: $this->required_attributes;
return $this;
}
/**
* Set attributes to automatically to specific tags.
*
* @param array $values List of tags: ['a' => 'target="_blank"']
*
* @return Attribute
*/
public function setAttributeOverrides(array $values)
{
$this->add_attributes = $values ?: $this->add_attributes;
return $this;
}
/**
* Set attributes that must be an integer.
*
* @param array $values List of tags: ['width', 'height']
*
* @return Attribute
*/
public function setIntegerAttributes(array $values)
{
$this->integer_attributes = $values ?: $this->integer_attributes;
return $this;
}
/**
* Set allowed iframe resources.
*
* @param array $values List of tags: ['http://www.youtube.com']
*
* @return Attribute
*/
public function setIframeWhitelist(array $values)
{
$this->iframe_whitelist = $values ?: $this->iframe_whitelist;
return $this;
}
/**
* Set image proxy URL.
*
* The original image url will be urlencoded
*
* @param string $url Proxy URL
*
* @return Attribute
*/
public function setImageProxyUrl($url)
{
$this->image_proxy_url = $url ?: $this->image_proxy_url;
return $this;
}
/**
* Set image proxy callback.
*
* @param \Closure $callback
*
* @return Attribute
*/
public function setImageProxyCallback($callback)
{
$this->image_proxy_callback = $callback ?: $this->image_proxy_callback;
return $this;
}
/**
* Set image proxy protocol restriction.
*
* @param string $value
*
* @return Attribute
*/
public function setImageProxyProtocol($value)
{
$this->image_proxy_limit_protocol = $value ?: $this->image_proxy_limit_protocol;
return $this;
}
}
picoFeed-0.1.18/lib/PicoFeed/Filter/Filter.php 0000664 0000000 0000000 00000007355 12656251722 0020750 0 ustar 00root root 0000000 0000000 ]*>\s*~i', '', $data);
}
/**
* Remove the XML tag from a document.
*
* @static
*
* @param string $data Input data
*
* @return string
*/
public static function stripXmlTag($data)
{
if (strpos($data, '') + 2));
}
do {
$pos = strpos($data, '') + 2));
}
} while ($pos !== false && $pos < 200);
return $data;
}
/**
* Strip head tag from the HTML content.
*
* @static
*
* @param string $data Input data
*
* @return string
*/
public static function stripHeadTags($data)
{
return preg_replace('@
]*?>.*?@siu', '', $data);
}
/**
* Trim whitespace from the begining, the end and inside a string and don't break utf-8 string.
*
* @static
*
* @param string $value Raw data
*
* @return string Normalized data
*/
public static function stripWhiteSpace($value)
{
$value = str_replace("\r", ' ', $value);
$value = str_replace("\t", ' ', $value);
$value = str_replace("\n", ' ', $value);
// $value = preg_replace('/\s+/', ' ', $value); <= break utf-8
return trim($value);
}
/**
* Fixes before XML parsing.
*
* @static
*
* @param string $data Raw data
*
* @return string Normalized data
*/
public static function normalizeData($data)
{
$entities = array(
'/()(\d+);/m', // decimal encoded
'/()([a-f0-9]+);/mi', // hex encoded
);
// strip invalid XML 1.0 characters which are encoded as entities
$data = preg_replace_callback($entities, function ($matches) {
$code_point = $matches[2];
// convert hex entity to decimal
if (strtolower($matches[1]) === '') {
$code_point = hexdec($code_point);
}
$code_point = (int) $code_point;
// replace invalid characters
if ($code_point < 9
|| ($code_point > 10 && $code_point < 13)
|| ($code_point > 13 && $code_point < 32)
|| ($code_point > 55295 && $code_point < 57344)
|| ($code_point > 65533 && $code_point < 65536)
|| $code_point > 1114111
) {
return '';
};
return $matches[0];
}, $data);
// strip every utf-8 character than isn't in the range of valid XML 1.0 characters
return (string) preg_replace('/[^\x{0009}\x{000A}\x{000D}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]/u', '', $data);
}
}
picoFeed-0.1.18/lib/PicoFeed/Filter/Html.php 0000664 0000000 0000000 00000014717 12656251722 0020427 0 ustar 00root root 0000000 0000000 config = new Config();
$this->input = XmlParser::htmlToXml($html);
$this->output = '';
$this->tag = new Tag($this->config);
$this->website = $website;
$this->attribute = new Attribute(new Url($website));
}
/**
* Set config object.
*
* @param \PicoFeed\Config\Config $config Config instance
*
* @return \PicoFeed\Filter\Html
*/
public function setConfig($config)
{
$this->config = $config;
if ($this->config !== null) {
$this->attribute->setImageProxyCallback($this->config->getFilterImageProxyCallback());
$this->attribute->setImageProxyUrl($this->config->getFilterImageProxyUrl());
$this->attribute->setImageProxyProtocol($this->config->getFilterImageProxyProtocol());
$this->attribute->setIframeWhitelist($this->config->getFilterIframeWhitelist(array()));
$this->attribute->setIntegerAttributes($this->config->getFilterIntegerAttributes(array()));
$this->attribute->setAttributeOverrides($this->config->getFilterAttributeOverrides(array()));
$this->attribute->setRequiredAttributes($this->config->getFilterRequiredAttributes(array()));
$this->attribute->setMediaBlacklist($this->config->getFilterMediaBlacklist(array()));
$this->attribute->setMediaAttributes($this->config->getFilterMediaAttributes(array()));
$this->attribute->setSchemeWhitelist($this->config->getFilterSchemeWhitelist(array()));
$this->attribute->setWhitelistedAttributes($this->config->getFilterWhitelistedTags(array()));
$this->tag->setWhitelistedTags(array_keys($this->config->getFilterWhitelistedTags(array())));
}
return $this;
}
/**
* Run tags/attributes filtering.
*
* @return string
*/
public function execute()
{
$this->preFilter();
$parser = xml_parser_create();
xml_set_object($parser, $this);
xml_set_element_handler($parser, 'startTag', 'endTag');
xml_set_character_data_handler($parser, 'dataTag');
xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, false);
xml_parse($parser, $this->input, true);
xml_parser_free($parser);
$this->postFilter();
return $this->output;
}
/**
* Called before XML parsing.
*/
public function preFilter()
{
$this->input = $this->tag->removeBlacklistedTags($this->input);
}
/**
* Called after XML parsing.
*/
public function postFilter()
{
$this->output = $this->tag->removeEmptyTags($this->output);
$this->output = $this->filterRules($this->output);
$this->output = $this->tag->removeMultipleBreakTags($this->output);
$this->output = trim($this->output);
}
/**
* Called after XML parsing.
*
* @param string $content the content that should be filtered
*/
public function filterRules($content)
{
// the constructor should require a config, then this if can be removed
if ($this->config === null) {
$config = new Config();
} else {
$config = $this->config;
}
$loader = new RuleLoader($config);
$rules = $loader->getRules($this->website);
$url = new Url($this->website);
$sub_url = $url->getFullPath();
if (isset($rules['filter'])) {
foreach ($rules['filter'] as $pattern => $rule) {
if (preg_match($pattern, $sub_url)) {
foreach ($rule as $search => $replace) {
$content = preg_replace($search, $replace, $content);
}
}
}
}
return $content;
}
/**
* Parse opening tag.
*
* @param resource $parser XML parser
* @param string $tag Tag name
* @param array $attributes Tag attributes
*/
public function startTag($parser, $tag, array $attributes)
{
$this->empty = true;
if ($this->tag->isAllowed($tag, $attributes)) {
$attributes = $this->attribute->filter($tag, $attributes);
if ($this->attribute->hasRequiredAttributes($tag, $attributes)) {
$attributes = $this->attribute->addAttributes($tag, $attributes);
$this->output .= $this->tag->openHtmlTag($tag, $this->attribute->toHtml($attributes));
$this->empty = false;
}
}
$this->empty_tags[] = $this->empty;
}
/**
* Parse closing tag.
*
* @param resource $parser XML parser
* @param string $tag Tag name
*/
public function endTag($parser, $tag)
{
if (!array_pop($this->empty_tags) && $this->tag->isAllowedTag($tag)) {
$this->output .= $this->tag->closeHtmlTag($tag);
}
}
/**
* Parse tag content.
*
* @param resource $parser XML parser
* @param string $content Tag content
*/
public function dataTag($parser, $content)
{
// Replace with normal space
$content = str_replace("\xc2\xa0", ' ', $content);
$this->output .= Filter::escape($content);
}
}
picoFeed-0.1.18/lib/PicoFeed/Filter/Tag.php 0000664 0000000 0000000 00000011052 12656251722 0020223 0 ustar 00root root 0000000 0000000 config = $config;
}
/**
* Check if the tag is allowed and is not a pixel tracker.
*
* @param string $tag Tag name
* @param array $attributes Attributes dictionary
*
* @return bool
*/
public function isAllowed($tag, array $attributes)
{
return $this->isAllowedTag($tag) && !$this->isPixelTracker($tag, $attributes);
}
/**
* Return the HTML opening tag.
*
* @param string $tag Tag name
* @param string $attributes Attributes converted in html
*
* @return string
*/
public function openHtmlTag($tag, $attributes = '')
{
return '<'.$tag.(empty($attributes) ? '' : ' '.$attributes).($this->isSelfClosingTag($tag) ? '/>' : '>');
}
/**
* Return the HTML closing tag.
*
* @param string $tag Tag name
*
* @return string
*/
public function closeHtmlTag($tag)
{
return $this->isSelfClosingTag($tag) ? '' : ''.$tag.'>';
}
/**
* Return true is the tag is self-closing.
*
* @param string $tag Tag name
*
* @return bool
*/
public function isSelfClosingTag($tag)
{
return $tag === 'br' || $tag === 'img';
}
/**
* Check if a tag is on the whitelist.
*
* @param string $tag Tag name
*
* @return bool
*/
public function isAllowedTag($tag)
{
return in_array($tag, array_merge(
$this->tag_whitelist,
array_keys($this->config->getFilterWhitelistedTags(array()))
));
}
/**
* Detect if an image tag is a pixel tracker.
*
* @param string $tag Tag name
* @param array $attributes Tag attributes
*
* @return bool
*/
public function isPixelTracker($tag, array $attributes)
{
return $tag === 'img' &&
isset($attributes['height']) && isset($attributes['width']) &&
$attributes['height'] == 1 && $attributes['width'] == 1;
}
/**
* Remove script tags.
*
* @param string $data Input data
*
* @return string
*/
public function removeBlacklistedTags($data)
{
$dom = XmlParser::getDomDocument($data);
if ($dom === false) {
return '';
}
$xpath = new DOMXpath($dom);
$nodes = $xpath->query(implode(' | ', $this->tag_blacklist));
foreach ($nodes as $node) {
$node->parentNode->removeChild($node);
}
return $dom->saveXML();
}
/**
* Remove empty tags.
*
* @param string $data Input data
*
* @return string
*/
public function removeEmptyTags($data)
{
return preg_replace('/<([^<\/>]*)>([\s]*?|(?R))<\/\1>/imsU', '', $data);
}
/**
* Replace
by only one.
*
* @param string $data Input data
*
* @return string
*/
public function removeMultipleBreakTags($data)
{
return preg_replace("/(
\s*)+/", '
', $data);
}
/**
* Set whitelisted tags adn attributes for each tag.
*
* @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']]
*
* @return Tag
*/
public function setWhitelistedTags(array $values)
{
$this->tag_whitelist = $values ?: $this->tag_whitelist;
return $this;
}
}
picoFeed-0.1.18/lib/PicoFeed/Logging/ 0000775 0000000 0000000 00000000000 12656251722 0017141 5 ustar 00root root 0000000 0000000 picoFeed-0.1.18/lib/PicoFeed/Logging/Logger.php 0000664 0000000 0000000 00000003566 12656251722 0021103 0 ustar 00root root 0000000 0000000 format('Y-m-d H:i:s').'] '.$message;
}
}
/**
* Get all logged messages.
*
* @static
*
* @return array
*/
public static function getMessages()
{
return self::$messages;
}
/**
* Remove all logged messages.
*
* @static
*/
public static function deleteMessages()
{
self::$messages = array();
}
/**
* Set a different timezone.
*
* @static
*
* @see http://php.net/manual/en/timezones.php
*
* @param string $timezone Timezone
*/
public static function setTimeZone($timezone)
{
self::$timezone = $timezone ?: self::$timezone;
}
/**
* Get all messages serialized into a string.
*
* @static
*
* @return string
*/
public static function toString()
{
return implode(PHP_EOL, self::$messages).PHP_EOL;
}
}
picoFeed-0.1.18/lib/PicoFeed/Parser/ 0000775 0000000 0000000 00000000000 12656251722 0017007 5 ustar 00root root 0000000 0000000 picoFeed-0.1.18/lib/PicoFeed/Parser/Atom.php 0000664 0000000 0000000 00000026117 12656251722 0020427 0 ustar 00root root 0000000 0000000 'http://www.w3.org/2005/Atom',
);
/**
* Get the path to the items XML tree.
*
* @param SimpleXMLElement $xml Feed xml
*
* @return SimpleXMLElement
*/
public function getItemsTree(SimpleXMLElement $xml)
{
return XmlParser::getXPathResult($xml, 'atom:entry', $this->namespaces)
?: XmlParser::getXPathResult($xml, 'entry');
}
/**
* Find the feed url.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedUrl(SimpleXMLElement $xml, Feed $feed)
{
$feed->feed_url = $this->getUrl($xml, 'self');
}
/**
* Find the site url.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findSiteUrl(SimpleXMLElement $xml, Feed $feed)
{
$feed->site_url = $this->getUrl($xml, 'alternate', true);
}
/**
* Find the feed description.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedDescription(SimpleXMLElement $xml, Feed $feed)
{
$description = XmlParser::getXPathResult($xml, 'atom:subtitle', $this->namespaces)
?: XmlParser::getXPathResult($xml, 'subtitle');
$feed->description = (string) current($description);
}
/**
* Find the feed logo url.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedLogo(SimpleXMLElement $xml, Feed $feed)
{
$logo = XmlParser::getXPathResult($xml, 'atom:logo', $this->namespaces)
?: XmlParser::getXPathResult($xml, 'logo');
$feed->logo = (string) current($logo);
}
/**
* Find the feed icon.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedIcon(SimpleXMLElement $xml, Feed $feed)
{
$icon = XmlParser::getXPathResult($xml, 'atom:icon', $this->namespaces)
?: XmlParser::getXPathResult($xml, 'icon');
$feed->icon = (string) current($icon);
}
/**
* Find the feed title.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedTitle(SimpleXMLElement $xml, Feed $feed)
{
$title = XmlParser::getXPathResult($xml, 'atom:title', $this->namespaces)
?: XmlParser::getXPathResult($xml, 'title');
$feed->title = Filter::stripWhiteSpace((string) current($title)) ?: $feed->getSiteUrl();
}
/**
* Find the feed language.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed)
{
$language = XmlParser::getXPathResult($xml, '*[not(self::atom:entry)]/@xml:lang', $this->namespaces)
?: XmlParser::getXPathResult($xml, '@xml:lang');
$feed->language = (string) current($language);
}
/**
* Find the feed id.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedId(SimpleXMLElement $xml, Feed $feed)
{
$id = XmlParser::getXPathResult($xml, 'atom:id', $this->namespaces)
?: XmlParser::getXPathResult($xml, 'id');
$feed->id = (string) current($id);
}
/**
* Find the feed date.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedDate(SimpleXMLElement $xml, Feed $feed)
{
$updated = XmlParser::getXPathResult($xml, 'atom:updated', $this->namespaces)
?: XmlParser::getXPathResult($xml, 'updated');
$feed->date = $this->date->getDateTime((string) current($updated));
}
/**
* Find the item date.
*
* @param SimpleXMLElement $entry Feed item
* @param Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed)
{
$published = XmlParser::getXPathResult($entry, 'atom:published', $this->namespaces)
?: XmlParser::getXPathResult($entry, 'published');
$updated = XmlParser::getXPathResult($entry, 'atom:updated', $this->namespaces)
?: XmlParser::getXPathResult($entry, 'updated');
$published = !empty($published) ? $this->date->getDateTime((string) current($published)) : null;
$updated = !empty($updated) ? $this->date->getDateTime((string) current($updated)) : null;
if ($published === null && $updated === null) {
$item->date = $feed->getDate(); // We use the feed date if there is no date for the item
} elseif ($published !== null && $updated !== null) {
$item->date = max($published, $updated); // We use the most recent date between published and updated
} else {
$item->date = $updated ?: $published;
}
}
/**
* Find the item title.
*
* @param SimpleXMLElement $entry Feed item
* @param Item $item Item object
*/
public function findItemTitle(SimpleXMLElement $entry, Item $item)
{
$title = XmlParser::getXPathResult($entry, 'atom:title', $this->namespaces)
?: XmlParser::getXPathResult($entry, 'title');
$item->title = Filter::stripWhiteSpace((string) current($title)) ?: $item->url;
}
/**
* Find the item author.
*
* @param SimpleXMLElement $xml Feed
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item)
{
$author = XmlParser::getXPathResult($entry, 'atom:author/atom:name', $this->namespaces)
?: XmlParser::getXPathResult($entry, 'author/name')
?: XmlParser::getXPathResult($xml, 'atom:author/atom:name', $this->namespaces)
?: XmlParser::getXPathResult($xml, 'author/name');
$item->author = (string) current($author);
}
/**
* Find the item content.
*
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
public function findItemContent(SimpleXMLElement $entry, Item $item)
{
$item->content = $this->getContent($entry);
}
/**
* Find the item URL.
*
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
public function findItemUrl(SimpleXMLElement $entry, Item $item)
{
$item->url = $this->getUrl($entry, 'alternate', true);
}
/**
* Genereate the item id.
*
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed)
{
$id = XmlParser::getXPathResult($entry, 'atom:id', $this->namespaces)
?: XmlParser::getXPathResult($entry, 'id');
if (!empty($id)) {
$item->id = $this->generateId((string) current($id));
} else {
$item->id = $this->generateId(
$item->getTitle(), $item->getUrl(), $item->getContent()
);
}
}
/**
* Find the item enclosure.
*
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed)
{
$enclosure = $this->findLink($entry, 'enclosure');
if ($enclosure) {
$item->enclosure_url = Url::resolve((string) $enclosure['href'], $feed->getSiteUrl());
$item->enclosure_type = (string) $enclosure['type'];
}
}
/**
* Find the item language.
*
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed)
{
$language = XmlParser::getXPathResult($entry, './/@xml:lang');
$item->language = (string) current($language) ?: $feed->language;
}
/**
* Get the URL from a link tag.
*
* @param SimpleXMLElement $xml XML tag
* @param string $rel Link relationship: alternate, enclosure, related, self, via
*
* @return string
*/
private function getUrl(SimpleXMLElement $xml, $rel, $fallback = false)
{
$link = $this->findLink($xml, $rel);
if ($link) {
return (string) $link['href'];
}
if ($fallback) {
$link = $this->findLink($xml, '');
return $link ? (string) $link['href'] : '';
}
return '';
}
/**
* Get a link tag that match a relationship.
*
* @param SimpleXMLElement $xml XML tag
* @param string $rel Link relationship: alternate, enclosure, related, self, via
*
* @return SimpleXMLElement|null
*/
private function findLink(SimpleXMLElement $xml, $rel)
{
$links = XmlParser::getXPathResult($xml, 'atom:link', $this->namespaces)
?: XmlParser::getXPathResult($xml, 'link');
foreach ($links as $link) {
if ($rel === (string) $link['rel']) {
return $link;
}
}
return;
}
/**
* Get the entry content.
*
* @param SimpleXMLElement $entry XML Entry
*
* @return string
*/
private function getContent(SimpleXMLElement $entry)
{
$content = current(
XmlParser::getXPathResult($entry, 'atom:content', $this->namespaces)
?: XmlParser::getXPathResult($entry, 'content')
);
if (!empty($content) && count($content->children())) {
$xml_string = '';
foreach ($content->children() as $child) {
$xml_string .= $child->asXML();
}
return $xml_string;
} elseif (trim((string) $content) !== '') {
return (string) $content;
}
$summary = XmlParser::getXPathResult($entry, 'atom:summary', $this->namespaces)
?: XmlParser::getXPathResult($entry, 'summary');
return (string) current($summary);
}
}
picoFeed-0.1.18/lib/PicoFeed/Parser/DateParser.php 0000664 0000000 0000000 00000004752 12656251722 0021562 0 ustar 00root root 0000000 0000000 length ].
*
* @var array
*/
public $formats = array(
DATE_ATOM => null,
DATE_RSS => null,
DATE_COOKIE => null,
DATE_ISO8601 => null,
DATE_RFC822 => null,
DATE_RFC850 => null,
DATE_RFC1036 => null,
DATE_RFC1123 => null,
DATE_RFC2822 => null,
DATE_RFC3339 => null,
'D, d M Y H:i:s' => 25,
'D, d M Y h:i:s' => 25,
'D M d Y H:i:s' => 24,
'j M Y H:i:s' => 20,
'Y-m-d H:i:s' => 19,
'Y-m-d\TH:i:s' => 19,
'd/m/Y H:i:s' => 19,
'D, d M Y' => 16,
'Y-m-d' => 10,
'd-m-Y' => 10,
'm-d-Y' => 10,
'd.m.Y' => 10,
'm.d.Y' => 10,
'd/m/Y' => 10,
'm/d/Y' => 10,
);
/**
* Try to parse all date format for broken feeds.
*
* @param string $value Original date format
*
* @return DateTime
*/
public function getDateTime($value)
{
$value = trim($value);
foreach ($this->formats as $format => $length) {
$truncated_value = $value;
if ($length !== null) {
$truncated_value = substr($truncated_value, 0, $length);
}
$date = $this->getValidDate($format, $truncated_value);
if ($date !== false) {
return $date;
}
}
return $this->getCurrentDateTime();
}
/**
* Get a valid date from a given format.
*
* @param string $format Date format
* @param string $value Original date value
*
* @return DateTime|bool
*/
public function getValidDate($format, $value)
{
$date = DateTime::createFromFormat($format, $value, new DateTimeZone($this->timezone));
if ($date !== false) {
$errors = DateTime::getLastErrors();
if ($errors['error_count'] === 0 && $errors['warning_count'] === 0) {
return $date;
}
}
return false;
}
/**
* Get the current datetime.
*
* @return DateTime
*/
public function getCurrentDateTime()
{
return new DateTime('now', new DateTimeZone($this->timezone));
}
}
picoFeed-0.1.18/lib/PicoFeed/Parser/Feed.php 0000664 0000000 0000000 00000005651 12656251722 0020372 0 ustar 00root root 0000000 0000000 $property.PHP_EOL;
}
$output .= 'Feed::date = '.$this->date->format(DATE_RFC822).PHP_EOL;
$output .= 'Feed::isRTL() = '.($this->isRTL() ? 'true' : 'false').PHP_EOL;
$output .= 'Feed::items = '.count($this->items).' items'.PHP_EOL;
foreach ($this->items as $item) {
$output .= '----'.PHP_EOL;
$output .= $item;
}
return $output;
}
/**
* Get title.
*/
public function getTitle()
{
return $this->title;
}
/**
* Get description.
*/
public function getDescription()
{
return $this->description;
}
/**
* Get the logo url.
*/
public function getLogo()
{
return $this->logo;
}
/**
* Get the icon url.
*/
public function getIcon()
{
return $this->icon;
}
/**
* Get feed url.
*/
public function getFeedUrl()
{
return $this->feed_url;
}
/**
* Get site url.
*/
public function getSiteUrl()
{
return $this->site_url;
}
/**
* Get date.
*/
public function getDate()
{
return $this->date;
}
/**
* Get language.
*/
public function getLanguage()
{
return $this->language;
}
/**
* Get id.
*/
public function getId()
{
return $this->id;
}
/**
* Get feed items.
*/
public function getItems()
{
return $this->items;
}
/**
* Return true if the feed is "Right to Left".
*
* @return bool
*/
public function isRTL()
{
return Parser::isLanguageRTL($this->language);
}
}
picoFeed-0.1.18/lib/PicoFeed/Parser/Item.php 0000664 0000000 0000000 00000007677 12656251722 0020437 0 ustar 00root root 0000000 0000000 xml, $query, $this->namespaces);
if ($elements === false) { // xPath error
return false;
}
return array_map(function ($element) { return (string) $element;}, $elements);
}
/**
* Return item information.
*/
public function __toString()
{
$output = '';
foreach (array('id', 'title', 'url', 'language', 'author', 'enclosure_url', 'enclosure_type') as $property) {
$output .= 'Item::'.$property.' = '.$this->$property.PHP_EOL;
}
$output .= 'Item::date = '.$this->date->format(DATE_RFC822).PHP_EOL;
$output .= 'Item::isRTL() = '.($this->isRTL() ? 'true' : 'false').PHP_EOL;
$output .= 'Item::content = '.strlen($this->content).' bytes'.PHP_EOL;
return $output;
}
/**
* Get title.
*/
public function getTitle()
{
return $this->title;
}
/**
* Get url.
*/
public function getUrl()
{
return $this->url;
}
/**
* Get id.
*/
public function getId()
{
return $this->id;
}
/**
* Get date.
*/
public function getDate()
{
return $this->date;
}
/**
* Get content.
*/
public function getContent()
{
return $this->content;
}
/**
* Get enclosure url.
*/
public function getEnclosureUrl()
{
return $this->enclosure_url;
}
/**
* Get enclosure type.
*/
public function getEnclosureType()
{
return $this->enclosure_type;
}
/**
* Get language.
*/
public function getLanguage()
{
return $this->language;
}
/**
* Get author.
*/
public function getAuthor()
{
return $this->author;
}
/**
* Return true if the item is "Right to Left".
*
* @return bool
*/
public function isRTL()
{
return Parser::isLanguageRTL($this->language);
}
}
picoFeed-0.1.18/lib/PicoFeed/Parser/MalformedXmlException.php 0000664 0000000 0000000 00000000247 12656251722 0023771 0 ustar 00root root 0000000 0000000 date = new DateParser();
$this->fallback_url = $fallback_url;
$xml_encoding = XmlParser::getEncodingFromXmlTag($content);
// Strip XML tag to avoid multiple encoding/decoding in the next XML processing
$this->content = Filter::stripXmlTag($content);
// Encode everything in UTF-8
Logger::setMessage(get_called_class().': HTTP Encoding "'.$http_encoding.'" ; XML Encoding "'.$xml_encoding.'"');
$this->content = Encoding::convert($this->content, $xml_encoding ?: $http_encoding);
}
/**
* Parse the document.
*
* @return \PicoFeed\Parser\Feed
*/
public function execute()
{
Logger::setMessage(get_called_class().': begin parsing');
$xml = XmlParser::getSimpleXml($this->content);
if ($xml === false) {
Logger::setMessage(get_called_class().': Applying XML workarounds');
$this->content = Filter::normalizeData($this->content);
$xml = XmlParser::getSimpleXml($this->content);
if ($xml === false) {
Logger::setMessage(get_called_class().': XML parsing error');
Logger::setMessage(XmlParser::getErrors());
throw new MalformedXmlException('XML parsing error');
}
}
$this->used_namespaces = $xml->getNamespaces(true);
$xml = $this->registerSupportedNamespaces($xml);
$feed = new Feed();
$this->findFeedUrl($xml, $feed);
$this->checkFeedUrl($feed);
$this->findSiteUrl($xml, $feed);
$this->checkSiteUrl($feed);
$this->findFeedTitle($xml, $feed);
$this->findFeedDescription($xml, $feed);
$this->findFeedLanguage($xml, $feed);
$this->findFeedId($xml, $feed);
$this->findFeedDate($xml, $feed);
$this->findFeedLogo($xml, $feed);
$this->findFeedIcon($xml, $feed);
foreach ($this->getItemsTree($xml) as $entry) {
$entry = $this->registerSupportedNamespaces($entry);
$item = new Item();
$item->xml = $entry;
$item->namespaces = $this->used_namespaces;
$this->findItemAuthor($xml, $entry, $item);
$this->findItemUrl($entry, $item);
$this->checkItemUrl($feed, $item);
$this->findItemTitle($entry, $item);
$this->findItemContent($entry, $item);
// Id generation can use the item url/title/content (order is important)
$this->findItemId($entry, $item, $feed);
$this->findItemDate($entry, $item, $feed);
$this->findItemEnclosure($entry, $item, $feed);
$this->findItemLanguage($entry, $item, $feed);
// Order is important (avoid double filtering)
$this->filterItemContent($feed, $item);
$this->scrapWebsite($item);
$feed->items[] = $item;
}
Logger::setMessage(get_called_class().PHP_EOL.$feed);
return $feed;
}
/**
* Check if the feed url is correct.
*
* @param Feed $feed Feed object
*/
public function checkFeedUrl(Feed $feed)
{
if ($feed->getFeedUrl() === '') {
$feed->feed_url = $this->fallback_url;
} else {
$feed->feed_url = Url::resolve($feed->getFeedUrl(), $this->fallback_url);
}
}
/**
* Check if the site url is correct.
*
* @param Feed $feed Feed object
*/
public function checkSiteUrl(Feed $feed)
{
if ($feed->getSiteUrl() === '') {
$feed->site_url = Url::base($feed->getFeedUrl());
} else {
$feed->site_url = Url::resolve($feed->getSiteUrl(), $this->fallback_url);
}
}
/**
* Check if the item url is correct.
*
* @param Feed $feed Feed object
* @param Item $item Item object
*/
public function checkItemUrl(Feed $feed, Item $item)
{
$item->url = Url::resolve($item->getUrl(), $feed->getSiteUrl());
}
/**
* Fetch item content with the content grabber.
*
* @param Item $item Item object
*/
public function scrapWebsite(Item $item)
{
if ($this->enable_grabber && !in_array($item->getUrl(), $this->grabber_ignore_urls)) {
$grabber = new Scraper($this->config);
$grabber->setUrl($item->getUrl());
if ($this->grabber_needs_rule_file) {
$grabber->disableCandidateParser();
}
$grabber->execute();
if ($grabber->hasRelevantContent()) {
$item->content = $grabber->getFilteredContent();
}
}
}
/**
* Filter HTML for entry content.
*
* @param Feed $feed Feed object
* @param Item $item Item object
*/
public function filterItemContent(Feed $feed, Item $item)
{
if ($this->isFilteringEnabled()) {
$filter = Filter::html($item->getContent(), $feed->getSiteUrl());
$filter->setConfig($this->config);
$item->content = $filter->execute();
} else {
Logger::setMessage(get_called_class().': Content filtering disabled');
}
}
/**
* Generate a unique id for an entry (hash all arguments).
*
* @return string
*/
public function generateId()
{
return hash($this->hash_algo, implode(func_get_args()));
}
/**
* Return true if the given language is "Right to Left".
*
* @static
*
* @param string $language Language: fr-FR, en-US
*
* @return bool
*/
public static function isLanguageRTL($language)
{
$language = strtolower($language);
$rtl_languages = array(
'ar', // Arabic (ar-**)
'fa', // Farsi (fa-**)
'ur', // Urdu (ur-**)
'ps', // Pashtu (ps-**)
'syr', // Syriac (syr-**)
'dv', // Divehi (dv-**)
'he', // Hebrew (he-**)
'yi', // Yiddish (yi-**)
);
foreach ($rtl_languages as $prefix) {
if (strpos($language, $prefix) === 0) {
return true;
}
}
return false;
}
/**
* Set Hash algorithm used for id generation.
*
* @param string $algo Algorithm name
*
* @return \PicoFeed\Parser\Parser
*/
public function setHashAlgo($algo)
{
$this->hash_algo = $algo ?: $this->hash_algo;
return $this;
}
/**
* Set a different timezone.
*
* @see http://php.net/manual/en/timezones.php
*
* @param string $timezone Timezone
*
* @return \PicoFeed\Parser\Parser
*/
public function setTimezone($timezone)
{
if ($timezone) {
$this->date->timezone = $timezone;
}
return $this;
}
/**
* Set config object.
*
* @param \PicoFeed\Config\Config $config Config instance
*
* @return \PicoFeed\Parser\Parser
*/
public function setConfig($config)
{
$this->config = $config;
return $this;
}
/**
* Enable the content grabber.
*
* @return \PicoFeed\Parser\Parser
*/
public function disableContentFiltering()
{
$this->enable_filter = false;
}
/**
* Return true if the content filtering is enabled.
*
* @return bool
*/
public function isFilteringEnabled()
{
if ($this->config === null) {
return $this->enable_filter;
}
return $this->config->getContentFiltering($this->enable_filter);
}
/**
* Enable the content grabber.
*
* @param bool $needs_rule_file true if only pages with rule files should be
* scraped
*
* @return \PicoFeed\Parser\Parser
*/
public function enableContentGrabber($needs_rule_file = false)
{
$this->enable_grabber = true;
$this->grabber_needs_rule_file = $needs_rule_file;
}
/**
* Set ignored URLs for the content grabber.
*
* @param array $urls URLs
*
* @return \PicoFeed\Parser\Parser
*/
public function setGrabberIgnoreUrls(array $urls)
{
$this->grabber_ignore_urls = $urls;
}
/**
* Register all supported namespaces to be used within an xpath query.
*
* @param SimpleXMLElement $xml Feed xml
*
* @return SimpleXMLElement
*/
public function registerSupportedNamespaces(SimpleXMLElement $xml)
{
foreach ($this->namespaces as $prefix => $ns) {
$xml->registerXPathNamespace($prefix, $ns);
}
return $xml;
}
/**
* Find the feed url.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
abstract public function findFeedUrl(SimpleXMLElement $xml, Feed $feed);
/**
* Find the site url.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
abstract public function findSiteUrl(SimpleXMLElement $xml, Feed $feed);
/**
* Find the feed title.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
abstract public function findFeedTitle(SimpleXMLElement $xml, Feed $feed);
/**
* Find the feed description.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
abstract public function findFeedDescription(SimpleXMLElement $xml, Feed $feed);
/**
* Find the feed language.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
abstract public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed);
/**
* Find the feed id.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
abstract public function findFeedId(SimpleXMLElement $xml, Feed $feed);
/**
* Find the feed date.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
abstract public function findFeedDate(SimpleXMLElement $xml, Feed $feed);
/**
* Find the feed logo url.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
abstract public function findFeedLogo(SimpleXMLElement $xml, Feed $feed);
/**
* Find the feed icon.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
abstract public function findFeedIcon(SimpleXMLElement $xml, Feed $feed);
/**
* Get the path to the items XML tree.
*
* @param SimpleXMLElement $xml Feed xml
*
* @return SimpleXMLElement
*/
abstract public function getItemsTree(SimpleXMLElement $xml);
/**
* Find the item author.
*
* @param SimpleXMLElement $xml Feed
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
abstract public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item);
/**
* Find the item URL.
*
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
abstract public function findItemUrl(SimpleXMLElement $entry, Item $item);
/**
* Find the item title.
*
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
abstract public function findItemTitle(SimpleXMLElement $entry, Item $item);
/**
* Genereate the item id.
*
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
abstract public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed);
/**
* Find the item date.
*
* @param SimpleXMLElement $entry Feed item
* @param Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
abstract public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed);
/**
* Find the item content.
*
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
abstract public function findItemContent(SimpleXMLElement $entry, Item $item);
/**
* Find the item enclosure.
*
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
abstract public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed);
/**
* Find the item language.
*
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
abstract public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed);
}
picoFeed-0.1.18/lib/PicoFeed/Parser/ParserException.php 0000664 0000000 0000000 00000000307 12656251722 0022633 0 ustar 00root root 0000000 0000000 'http://purl.org/rss/1.0/',
'dc' => 'http://purl.org/dc/elements/1.1/',
'content' => 'http://purl.org/rss/1.0/modules/content/',
'feedburner' => 'http://rssnamespace.org/feedburner/ext/1.0',
);
/**
* Get the path to the items XML tree.
*
* @param SimpleXMLElement $xml Feed xml
*
* @return SimpleXMLElement
*/
public function getItemsTree(SimpleXMLElement $xml)
{
return XmlParser::getXPathResult($xml, 'rss:item', $this->namespaces)
?: XmlParser::getXPathResult($xml, 'item');
}
/**
* Find the feed url.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedUrl(SimpleXMLElement $xml, Feed $feed)
{
$feed->feed_url = '';
}
/**
* Find the site url.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findSiteUrl(SimpleXMLElement $xml, Feed $feed)
{
$site_url = XmlParser::getXPathResult($xml, 'rss:channel/rss:link', $this->namespaces)
?: XmlParser::getXPathResult($xml, 'channel/link');
$feed->site_url = (string) current($site_url);
}
/**
* Find the feed description.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedDescription(SimpleXMLElement $xml, Feed $feed)
{
$description = XmlParser::getXPathResult($xml, 'rss:channel/rss:description', $this->namespaces)
?: XmlParser::getXPathResult($xml, 'channel/description');
$feed->description = (string) current($description);
}
/**
* Find the feed logo url.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedLogo(SimpleXMLElement $xml, Feed $feed)
{
$logo = XmlParser::getXPathResult($xml, 'rss:image/rss:url', $this->namespaces)
?: XmlParser::getXPathResult($xml, 'image/url');
$feed->logo = (string) current($logo);
}
/**
* Find the feed icon.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedIcon(SimpleXMLElement $xml, Feed $feed)
{
$feed->icon = '';
}
/**
* Find the feed title.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedTitle(SimpleXMLElement $xml, Feed $feed)
{
$title = XmlParser::getXPathResult($xml, 'rss:channel/rss:title', $this->namespaces)
?: XmlParser::getXPathResult($xml, 'channel/title');
$feed->title = Filter::stripWhiteSpace((string) current($title)) ?: $feed->getSiteUrl();
}
/**
* Find the feed language.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed)
{
$language = XmlParser::getXPathResult($xml, 'rss:channel/dc:language', $this->namespaces)
?: XmlParser::getXPathResult($xml, 'channel/dc:language', $this->namespaces);
$feed->language = (string) current($language);
}
/**
* Find the feed id.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedId(SimpleXMLElement $xml, Feed $feed)
{
$feed->id = $feed->getFeedUrl() ?: $feed->getSiteUrl();
}
/**
* Find the feed date.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedDate(SimpleXMLElement $xml, Feed $feed)
{
$date = XmlParser::getXPathResult($xml, 'rss:channel/dc:date', $this->namespaces)
?: XmlParser::getXPathResult($xml, 'channel/dc:date', $this->namespaces);
$feed->date = $this->date->getDateTime((string) current($date));
}
/**
* Find the item date.
*
* @param SimpleXMLElement $entry Feed item
* @param Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed)
{
$date = XmlParser::getXPathResult($entry, 'dc:date', $this->namespaces);
$item->date = empty($date) ? $feed->getDate() : $this->date->getDateTime((string) current($date));
}
/**
* Find the item title.
*
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
public function findItemTitle(SimpleXMLElement $entry, Item $item)
{
$title = XmlParser::getXPathResult($entry, 'rss:title', $this->namespaces)
?: XmlParser::getXPathResult($entry, 'title');
$item->title = Filter::stripWhiteSpace((string) current($title)) ?: $item->url;
}
/**
* Find the item author.
*
* @param SimpleXMLElement $xml Feed
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item)
{
$author = XmlParser::getXPathResult($entry, 'dc:creator', $this->namespaces)
?: XmlParser::getXPathResult($xml, 'rss:channel/dc:creator', $this->namespaces)
?: XmlParser::getXPathResult($xml, 'channel/dc:creator', $this->namespaces);
$item->author = (string) current($author);
}
/**
* Find the item content.
*
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
public function findItemContent(SimpleXMLElement $entry, Item $item)
{
$content = XmlParser::getXPathResult($entry, 'content:encoded', $this->namespaces);
if (trim((string) current($content)) === '') {
$content = XmlParser::getXPathResult($entry, 'rss:description', $this->namespaces)
?: XmlParser::getXPathResult($entry, 'description');
}
$item->content = (string) current($content);
}
/**
* Find the item URL.
*
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
public function findItemUrl(SimpleXMLElement $entry, Item $item)
{
$link = XmlParser::getXPathResult($entry, 'feedburner:origLink', $this->namespaces)
?: XmlParser::getXPathResult($entry, 'rss:link', $this->namespaces)
?: XmlParser::getXPathResult($entry, 'link');
$item->url = trim((string) current($link));
}
/**
* Genereate the item id.
*
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed)
{
$item->id = $this->generateId(
$item->getTitle(), $item->getUrl(), $item->getContent()
);
}
/**
* Find the item enclosure.
*
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed)
{
}
/**
* Find the item language.
*
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed)
{
$language = XmlParser::getXPathResult($entry, 'dc:language', $this->namespaces);
$item->language = (string) current($language) ?: $feed->language;
}
}
picoFeed-0.1.18/lib/PicoFeed/Parser/Rss20.php 0000664 0000000 0000000 00000022262 12656251722 0020435 0 ustar 00root root 0000000 0000000 'http://purl.org/dc/elements/1.1/',
'content' => 'http://purl.org/rss/1.0/modules/content/',
'feedburner' => 'http://rssnamespace.org/feedburner/ext/1.0',
'atom' => 'http://www.w3.org/2005/Atom',
);
/**
* Get the path to the items XML tree.
*
* @param SimpleXMLElement $xml Feed xml
*
* @return SimpleXMLElement
*/
public function getItemsTree(SimpleXMLElement $xml)
{
return XmlParser::getXPathResult($xml, 'channel/item');
}
/**
* Find the feed url.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedUrl(SimpleXMLElement $xml, Feed $feed)
{
$feed->feed_url = '';
}
/**
* Find the site url.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findSiteUrl(SimpleXMLElement $xml, Feed $feed)
{
$site_url = XmlParser::getXPathResult($xml, 'channel/link');
$feed->site_url = (string) current($site_url);
}
/**
* Find the feed description.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedDescription(SimpleXMLElement $xml, Feed $feed)
{
$description = XmlParser::getXPathResult($xml, 'channel/description');
$feed->description = (string) current($description);
}
/**
* Find the feed logo url.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedLogo(SimpleXMLElement $xml, Feed $feed)
{
$logo = XmlParser::getXPathResult($xml, 'channel/image/url');
$feed->logo = (string) current($logo);
}
/**
* Find the feed icon.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedIcon(SimpleXMLElement $xml, Feed $feed)
{
$feed->icon = '';
}
/**
* Find the feed title.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedTitle(SimpleXMLElement $xml, Feed $feed)
{
$title = XmlParser::getXPathResult($xml, 'channel/title');
$feed->title = Filter::stripWhiteSpace((string) current($title)) ?: $feed->getSiteUrl();
}
/**
* Find the feed language.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed)
{
$language = XmlParser::getXPathResult($xml, 'channel/language');
$feed->language = (string) current($language);
}
/**
* Find the feed id.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedId(SimpleXMLElement $xml, Feed $feed)
{
$feed->id = $feed->getFeedUrl() ?: $feed->getSiteUrl();
}
/**
* Find the feed date.
*
* @param SimpleXMLElement $xml Feed xml
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findFeedDate(SimpleXMLElement $xml, Feed $feed)
{
$publish_date = XmlParser::getXPathResult($xml, 'channel/pubDate');
$update_date = XmlParser::getXPathResult($xml, 'channel/lastBuildDate');
$published = !empty($publish_date) ? $this->date->getDateTime((string) current($publish_date)) : null;
$updated = !empty($update_date) ? $this->date->getDateTime((string) current($update_date)) : null;
if ($published === null && $updated === null) {
$feed->date = $this->date->getCurrentDateTime(); // We use the current date if there is no date for the feed
} elseif ($published !== null && $updated !== null) {
$feed->date = max($published, $updated); // We use the most recent date between published and updated
} else {
$feed->date = $updated ?: $published;
}
}
/**
* Find the item date.
*
* @param SimpleXMLElement $entry Feed item
* @param Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed)
{
$date = XmlParser::getXPathResult($entry, 'pubDate');
$item->date = empty($date) ? $feed->getDate() : $this->date->getDateTime((string) current($date));
}
/**
* Find the item title.
*
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
public function findItemTitle(SimpleXMLElement $entry, Item $item)
{
$title = XmlParser::getXPathResult($entry, 'title');
$item->title = Filter::stripWhiteSpace((string) current($title)) ?: $item->url;
}
/**
* Find the item author.
*
* @param SimpleXMLElement $xml Feed
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item)
{
$author = XmlParser::getXPathResult($entry, 'dc:creator', $this->namespaces)
?: XmlParser::getXPathResult($entry, 'author')
?: XmlParser::getXPathResult($xml, 'channel/dc:creator', $this->namespaces)
?: XmlParser::getXPathResult($xml, 'channel/managingEditor');
$item->author = (string) current($author);
}
/**
* Find the item content.
*
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
public function findItemContent(SimpleXMLElement $entry, Item $item)
{
$content = XmlParser::getXPathResult($entry, 'content:encoded', $this->namespaces);
if (trim((string) current($content)) === '') {
$content = XmlParser::getXPathResult($entry, 'description');
}
$item->content = (string) current($content);
}
/**
* Find the item URL.
*
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
*/
public function findItemUrl(SimpleXMLElement $entry, Item $item)
{
$link = XmlParser::getXPathResult($entry, 'feedburner:origLink', $this->namespaces)
?: XmlParser::getXPathResult($entry, 'link')
?: XmlParser::getXPathResult($entry, 'atom:link/@href', $this->namespaces);
if (!empty($link)) {
$item->url = trim((string) current($link));
} else {
$link = XmlParser::getXPathResult($entry, 'guid');
$link = trim((string) current($link));
if (filter_var($link, FILTER_VALIDATE_URL) !== false) {
$item->url = $link;
}
}
}
/**
* Genereate the item id.
*
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed)
{
$id = (string) current(XmlParser::getXPathResult($entry, 'guid'));
if ($id) {
$item->id = $this->generateId($id);
} else {
$item->id = $this->generateId(
$item->getTitle(), $item->getUrl(), $item->getContent()
);
}
}
/**
* Find the item enclosure.
*
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed)
{
if (isset($entry->enclosure)) {
$enclosure_url = XmlParser::getXPathResult($entry, 'feedburner:origEnclosureLink', $this->namespaces)
?: XmlParser::getXPathResult($entry, 'enclosure/@url');
$enclosure_type = XmlParser::getXPathResult($entry, 'enclosure/@type');
$item->enclosure_url = Url::resolve((string) current($enclosure_url), $feed->getSiteUrl());
$item->enclosure_type = (string) current($enclosure_type);
}
}
/**
* Find the item language.
*
* @param SimpleXMLElement $entry Feed item
* @param \PicoFeed\Parser\Item $item Item object
* @param \PicoFeed\Parser\Feed $feed Feed object
*/
public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed)
{
$language = XmlParser::getXPathResult($entry, 'dc:language', $this->namespaces);
$item->language = (string) current($language) ?: $feed->language;
}
}
picoFeed-0.1.18/lib/PicoFeed/Parser/Rss91.php 0000664 0000000 0000000 00000000175 12656251722 0020444 0 ustar 00root root 0000000 0000000 childNodes->length === 0) {
return false;
}
return $dom;
}
/**
* Small wrapper around ZendXml to turn their exceptions into picoFeed
* exceptions
* @param $input the xml to load
* @param $dom pass in a dom document or use null/omit if simpleXml should
* be used
*/
private static function scan($input, $dom=null)
{
try {
return Security::scan($input, $dom);
} catch(\ZendXml\Exception\RuntimeException $e) {
throw new XmlEntityException($e->getMessage());
}
}
/**
* Load HTML document by using a DomDocument instance or return false on failure.
*
* @static
*
* @param string $input XML content
*
* @return \DOMDocument
*/
public static function getHtmlDocument($input)
{
$dom = new DomDocument();
if (empty($input)) {
return $dom;
}
libxml_use_internal_errors(true);
if (version_compare(PHP_VERSION, '5.4.0', '>=')) {
$dom->loadHTML($input, LIBXML_NONET);
} else {
$dom->loadHTML($input);
}
return $dom;
}
/**
* Convert a HTML document to XML.
*
* @static
*
* @param string $html HTML document
*
* @return string
*/
public static function htmlToXml($html)
{
$dom = self::getHtmlDocument(''.$html);
return $dom->saveXML($dom->getElementsByTagName('body')->item(0));
}
/**
* Get XML parser errors.
*
* @static
*
* @return string
*/
public static function getErrors()
{
$errors = array();
foreach (libxml_get_errors() as $error) {
$errors[] = sprintf('XML error: %s (Line: %d - Column: %d - Code: %d)',
$error->message,
$error->line,
$error->column,
$error->code
);
}
return implode(', ', $errors);
}
/**
* Get the encoding from a xml tag.
*
* @static
*
* @param string $data Input data
*
* @return string
*/
public static function getEncodingFromXmlTag($data)
{
$encoding = '';
if (strpos($data, ''));
$data = str_replace("'", '"', $data);
$p1 = strpos($data, 'encoding=');
$p2 = strpos($data, '"', $p1 + 10);
if ($p1 !== false && $p2 !== false) {
$encoding = substr($data, $p1 + 10, $p2 - $p1 - 10);
$encoding = strtolower($encoding);
}
}
return $encoding;
}
/**
* Get the charset from a meta tag.
*
* @static
*
* @param string $data Input data
*
* @return string
*/
public static function getEncodingFromMetaTag($data)
{
$encoding = '';
if (preg_match('/;]+)/i', $data, $match) === 1) {
$encoding = strtolower($match[1]);
}
return $encoding;
}
/**
* Rewrite XPath query to use namespace-uri and local-name derived from prefix.
*
* @param string $query XPath query
* @param array $ns Prefix to namespace URI mapping
*
* @return string
*/
public static function replaceXPathPrefixWithNamespaceURI($query, array $ns)
{
return preg_replace_callback('/([A-Z0-9]+):([A-Z0-9]+)/iu', function ($matches) use ($ns) {
// don't try to map the special prefix XML
if (strtolower($matches[1]) === 'xml') {
return $matches[0];
}
return '*[namespace-uri()="'.$ns[$matches[1]].'" and local-name()="'.$matches[2].'"]';
},
$query);
}
/**
* Get the result elements of a XPath query.
*
* @param \SimpleXMLElement $xml XML element
* @param string $query XPath query
* @param array $ns Prefix to namespace URI mapping
*
* @return \SimpleXMLElement
*/
public static function getXPathResult(SimpleXMLElement $xml, $query, array $ns = array())
{
if (!empty($ns)) {
$query = static::replaceXPathPrefixWithNamespaceURI($query, $ns);
}
return $xml->xpath($query);
}
}
picoFeed-0.1.18/lib/PicoFeed/PicoFeedException.php 0000664 0000000 0000000 00000000253 12656251722 0021621 0 ustar 00root root 0000000 0000000 config = $config ?: new Config();
}
/**
* Get the icon file content (available only after the download).
*
* @return string
*/
public function getContent()
{
return $this->content;
}
/**
* Get the icon file type (available only after the download).
*
* @return string
*/
public function getType()
{
foreach ($this->types as $type) {
if (strpos($this->content_type, $type) === 0) {
return $type;
}
}
return 'image/x-icon';
}
/**
* Get data URI (http://en.wikipedia.org/wiki/Data_URI_scheme).
*
* @return string
*/
public function getDataUri()
{
if (empty($this->content)) {
return '';
}
return sprintf(
'data:%s;base64,%s',
$this->getType(),
base64_encode($this->content)
);
}
/**
* Download and check if a resource exists.
*
* @param string $url URL
*
* @return \PicoFeed\Client Client instance
*/
public function download($url)
{
$client = Client::getInstance();
$client->setConfig($this->config);
Logger::setMessage(get_called_class().' Download => '.$url);
try {
$client->execute($url);
} catch (ClientException $e) {
Logger::setMessage(get_called_class().' Download Failed => '.$e->getMessage());
}
return $client;
}
/**
* Check if a remote file exists.
*
* @param string $url URL
*
* @return bool
*/
public function exists($url)
{
return $this->download($url)->getContent() !== '';
}
/**
* Get the icon link for a website.
*
* @param string $website_link URL
* @param string $favicon_link optional URL
*
* @return string
*/
public function find($website_link, $favicon_link = '')
{
$website = new Url($website_link);
if ($favicon_link !== '') {
$icons = array($favicon_link);
} else {
$icons = $this->extract($this->download($website->getBaseUrl('/'))->getContent());
$icons[] = $website->getBaseUrl('/favicon.ico');
}
foreach ($icons as $icon_link) {
$icon_link = Url::resolve($icon_link, $website);
$resource = $this->download($icon_link);
$this->content = $resource->getContent();
$this->content_type = $resource->getContentType();
if ($this->content !== '') {
return $icon_link;
} elseif ($favicon_link !== '') {
return $this->find($website_link);
}
}
return '';
}
/**
* Extract the icon links from the HTML.
*
* @param string $html HTML
*
* @return array
*/
public function extract($html)
{
$icons = array();
if (empty($html)) {
return $icons;
}
$dom = XmlParser::getHtmlDocument($html);
$xpath = new DOMXpath($dom);
$elements = $xpath->query('//link[@rel="icon" or @rel="shortcut icon" or @rel="icon shortcut"]');
for ($i = 0; $i < $elements->length; ++$i) {
$icons[] = $elements->item($i)->getAttribute('href');
}
return $icons;
}
}
picoFeed-0.1.18/lib/PicoFeed/Reader/Reader.php 0000664 0000000 0000000 00000013077 12656251722 0020700 0 ustar 00root root 0000000 0000000 '//feed',
'Rss20' => '//rss[@version="2.0"]',
'Rss92' => '//rss[@version="0.92"]',
'Rss91' => '//rss[@version="0.91"]',
'Rss10' => '//rdf',
);
/**
* Config class instance.
*
* @var \PicoFeed\Config\Config
*/
private $config;
/**
* Constructor.
*
* @param \PicoFeed\Config\Config $config Config class instance
*/
public function __construct(Config $config = null)
{
$this->config = $config ?: new Config();
Logger::setTimezone($this->config->getTimezone());
}
/**
* Download a feed (no discovery).
*
* @param string $url Feed url
* @param string $last_modified Last modified HTTP header
* @param string $etag Etag HTTP header
* @param string $username HTTP basic auth username
* @param string $password HTTP basic auth password
*
* @return \PicoFeed\Client\Client
*/
public function download($url, $last_modified = '', $etag = '', $username = '', $password = '')
{
$url = $this->prependScheme($url);
return Client::getInstance()
->setConfig($this->config)
->setLastModified($last_modified)
->setEtag($etag)
->setUsername($username)
->setPassword($password)
->execute($url);
}
/**
* Discover and download a feed.
*
* @param string $url Feed or website url
* @param string $last_modified Last modified HTTP header
* @param string $etag Etag HTTP header
* @param string $username HTTP basic auth username
* @param string $password HTTP basic auth password
*
* @return \PicoFeed\Client\Client
*/
public function discover($url, $last_modified = '', $etag = '', $username = '', $password = '')
{
$client = $this->download($url, $last_modified, $etag, $username, $password);
// It's already a feed or the feed was not modified
if (!$client->isModified() || $this->detectFormat($client->getContent())) {
return $client;
}
// Try to find a subscription
$links = $this->find($client->getUrl(), $client->getContent());
if (empty($links)) {
throw new SubscriptionNotFoundException('Unable to find a subscription');
}
return $this->download($links[0], $last_modified, $etag, $username, $password);
}
/**
* Find feed urls inside a HTML document.
*
* @param string $url Website url
* @param string $html HTML content
*
* @return array List of feed links
*/
public function find($url, $html)
{
Logger::setMessage(get_called_class().': Try to discover subscriptions');
$dom = XmlParser::getHtmlDocument($html);
$xpath = new DOMXPath($dom);
$links = array();
$queries = array(
'//link[@type="application/rss+xml"]',
'//link[@type="application/atom+xml"]',
);
foreach ($queries as $query) {
$nodes = $xpath->query($query);
foreach ($nodes as $node) {
$link = $node->getAttribute('href');
if (!empty($link)) {
$feedUrl = new Url($link);
$siteUrl = new Url($url);
$links[] = $feedUrl->getAbsoluteUrl($feedUrl->isRelativeUrl() ? $siteUrl->getBaseUrl() : '');
}
}
}
Logger::setMessage(get_called_class().': '.implode(', ', $links));
return $links;
}
/**
* Get a parser instance.
*
* @param string $url Site url
* @param string $content Feed content
* @param string $encoding HTTP encoding
*
* @return \PicoFeed\Parser\Parser
*/
public function getParser($url, $content, $encoding)
{
$format = $this->detectFormat($content);
if (empty($format)) {
throw new UnsupportedFeedFormatException('Unable to detect feed format');
}
$className = '\PicoFeed\Parser\\'.$format;
$parser = new $className($content, $encoding, $url);
$parser->setHashAlgo($this->config->getParserHashAlgo());
$parser->setTimezone($this->config->getTimezone());
$parser->setConfig($this->config);
return $parser;
}
/**
* Detect the feed format.
*
* @param string $content Feed content
*
* @return string
*/
public function detectFormat($content)
{
$dom = XmlParser::getHtmlDocument($content);
$xpath = new DOMXPath($dom);
foreach ($this->formats as $parser_name => $query) {
$nodes = $xpath->query($query);
if ($nodes->length === 1) {
return $parser_name;
}
}
return '';
}
/**
* Add the prefix "http://" if the end-user just enter a domain name.
*
* @param string $url Url
* @retunr string
*/
public function prependScheme($url)
{
if (!preg_match('%^https?://%', $url)) {
$url = 'http://'.$url;
}
return $url;
}
}
picoFeed-0.1.18/lib/PicoFeed/Reader/ReaderException.php 0000664 0000000 0000000 00000000307 12656251722 0022547 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://combat.blog.lemonde.fr/2013/08/31/teddy-riner-le-rookie-devenu-rambo/#xtor=RSS-3208',
'body' => array(
'//div[@class="entry-content"]',
),
'strip' => array(
'//*[contains(@class, "fb-like") or contains(@class, "social")]'
),
)
)
);
picoFeed-0.1.18/lib/PicoFeed/Rules/.blogs.nytimes.com.php 0000664 0000000 0000000 00000000666 12656251722 0023016 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'title' => '//header/h1',
'test_url' => 'http://bits.blogs.nytimes.com/2012/01/16/wikipedia-plans-to-go-dark-on-wednesday-to-protest-sopa/',
'body' => array(
'//div[@class="postContent"]',
),
'strip' => array(
'//*[@class="shareToolsBox"]',
),
)
)
);
picoFeed-0.1.18/lib/PicoFeed/Rules/.igen.fr.php 0000664 0000000 0000000 00000000546 12656251722 0020771 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.igen.fr/ailleurs/2014/05/nvidia-va-delaisser-les-smartphones-grand-public-86031',
'body' => array(
'//div[contains(@class, "field-name-body")]'
),
'strip' => array(
),
)
)
);
picoFeed-0.1.18/lib/PicoFeed/Rules/.nytimes.com.php 0000664 0000000 0000000 00000000421 12656251722 0021676 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.nytimes.com/2011/05/15/world/middleeast/15prince.html',
'body' => array(
'//div[@class="articleBody"]',
),
)
)
);
picoFeed-0.1.18/lib/PicoFeed/Rules/.phoronix.com.php 0000664 0000000 0000000 00000000464 12656251722 0022063 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.phoronix.com/scan.php?page=article&item=amazon_ec2_bare&num=1',
'body' => array(
'//div[@class="content"]',
),
'strip' => array()
)
)
);
picoFeed-0.1.18/lib/PicoFeed/Rules/.slate.com.php 0000664 0000000 0000000 00000001534 12656251722 0021324 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.slate.com/articles/business/moneybox/2013/08/microsoft_ceo_steve_ballmer_retires_a_firsthand_account_of_the_company_s.html',
'body' => array(
'//div[@class="sl-art-body"]',
),
'strip' => array(
'//*[contains(@class, "social") or contains(@class, "comments") or contains(@class, "sl-article-floatin-tools") or contains(@class, "sl-art-pag")]',
'//*[@id="mys_slate_logged_in"]',
'//*[@id="sl_article_tools_myslate_bottom"]',
'//*[@id="mys_myslate"]',
'//*[@class="sl-viral-container"]',
'//*[@class="sl-art-creds-cntr"]',
'//*[@class="sl-art-ad-midflex"]',
)
)
)
);
picoFeed-0.1.18/lib/PicoFeed/Rules/.theguardian.com.php 0000664 0000000 0000000 00000000673 12656251722 0022512 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.theguardian.com/sustainable-business/2015/feb/02/2015-hyper-transparency-global-business',
'body' => array(
'//div[contains(@class, "content__main-column--article")]',
),
'strip' => array(
'//div[contains(@class, "meta-container")]',
),
)
)
);
picoFeed-0.1.18/lib/PicoFeed/Rules/.wikipedia.org.php 0000664 0000000 0000000 00000002066 12656251722 0022174 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'https://en.wikipedia.org/wiki/Grace_Hopper',
'body' => array(
'//div[@id="bodyContent"]',
),
'strip' => array(
"//div[@id='toc']",
"//div[@id='catlinks']",
"//div[@id='jump-to-nav']",
"//div[@class='thumbcaption']//div[@class='magnify']",
"//table[@class='navbox']",
"//table[contains(@class, 'infobox')]",
"//div[@class='dablink']",
"//div[@id='contentSub']",
"//div[@id='siteSub']",
"//table[@id='persondata']",
"//table[contains(@class, 'metadata')]",
"//*[contains(@class, 'noprint')]",
"//*[contains(@class, 'printfooter')]",
"//*[contains(@class, 'editsection')]",
"//*[contains(@class, 'error')]",
"//span[@title='pronunciation:']",
),
)
)
);
picoFeed-0.1.18/lib/PicoFeed/Rules/.wired.com.php 0000664 0000000 0000000 00000002152 12656251722 0021323 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.wired.com/gamelife/2013/09/ouya-free-the-games/',
'body' => array(
'//div[@data-js="gallerySlides"]',
'//article',
),
'strip' => array(
'//*[@id="linker_widget"]',
'//*[@class="credit"]',
'//div[@data-js="slideCount"]',
'//*[contains(@class="visually-hidden")]',
'//*[@data-slide-number="_endslate"]',
'//*[@id="related"]',
'//*[contains(@class, "bio")]',
'//*[contains(@class, "entry-footer")]',
'//*[contains(@class, "mobify_backtotop_link")]',
'//*[contains(@class, "gallery-navigation")]',
'//*[contains(@class, "gallery-thumbnail")]',
'//img[contains(@src, "1x1")]',
'//a[contains(@href, "creativecommons")]',
'//a[@href="#start-of-content"]',
'//ul[@id="article-tags"]',
),
)
)
);
picoFeed-0.1.18/lib/PicoFeed/Rules/.wsj.com.php 0000664 0000000 0000000 00000000652 12656251722 0021017 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://online.wsj.com/article/SB10001424127887324108204579023143974408428.html',
'body' => array(
'//div[@class="articlePage"]',
),
'strip' => array(
'//*[@id="articleThumbnail_2"]',
'//*[@class="socialByline"]',
)
)
)
);
picoFeed-0.1.18/lib/PicoFeed/Rules/01net.com.php 0000664 0000000 0000000 00000001146 12656251722 0021064 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.01net.com/editorial/624550/twitter-rachete-madbits-un-specialiste-francais-de-lanalyse-dimages/',
'body' => array(
'//div[@class="article_ventre_box"]',
),
'strip' => array(
'//link',
'//*[contains(@class, "article_navigation")]',
'//h1',
'//*[contains(@class, "article_toolbarMain")]',
'//*[contains(@class, "article_imagehaute_box")]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/abstrusegoose.com.php 0000664 0000000 0000000 00000000235 12656251722 0023020 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'%alt="(.+)" title="(.+)" */>%' => '/>
$1
$2',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/alainonline.net.php 0000664 0000000 0000000 00000000603 12656251722 0022433 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.alainonline.net/news_details.php?lang=arabic&sid=18907',
'body' => array(
'//div[@class="news_details"]',
),
'strip' => array(
'//div[@class="news_details"]/div/div[last()]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/aljazeera.com.php 0000664 0000000 0000000 00000001245 12656251722 0022073 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.aljazeera.com/news/2015/09/xi-jinping-seattle-china-150922230118373.html',
'body' => array(
'//figure[@class="article-content"]',
'//div[@class="article-body"]',
),
'strip' => array(
'//h1',
'//h3',
'//ul',
'//table[contains(@class, "in-article-item")]',
'//a[@target="_self"]',
'//div[@data-embed-type="Brightcove"]',
'//div[@class="QuoteContainer"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/allafrica.com.php 0000664 0000000 0000000 00000001050 12656251722 0022045 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.aljazeera.com/news/2015/09/xi-jinping-seattle-china-150922230118373.html',
'body' => array(
'//div[@class="story-body"]',
),
'strip' => array(
'//p[@class="kindofstory"]',
'//cite[@class="byline"]',
'//div[contains(@class,"related-topics")]',
'//links',
'//sharebar',
'//related-topics',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/allgemeine-zeitung.de.php 0000664 0000000 0000000 00000001540 12656251722 0023532 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.allgemeine-zeitung.de/lokales/polizei/mainz-gonsenheim-unbekannte-rauben-esso-tankstelle-in-kurt-schumacher-strasse-aus_14913147.htm',
'body' => array(
'//div[contains(@class, "article")][1]',
),
'strip' => array(
'//read/h1',
'//*[@id="t-map"]',
'//*[contains(@class, "modules")]',
'//*[contains(@class, "adsense")]',
'//*[contains(@class, "linkbox")]',
'//*[contains(@class, "info")]',
'//*[@class="skip"]',
'//*[@class="funcs"]',
'//span[@class="nd address"]',
'//a[contains(@href, "abo-und-services")]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/amazingsuperpowers.com.php 0000664 0000000 0000000 00000000213 12656251722 0024074 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'%title="(.+)" */>%' => '/>
$1',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/anythingcomic.com.php 0000664 0000000 0000000 00000000552 12656251722 0022771 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'body' => array(
'//img[@id="comic_image"]',
'//div[@class="comment-wrapper"][position()=1]',
),
'strip' => array(),
'test_url' => 'http://www.anythingcomic.com/comics/2108929/stress-free/',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/ap.org.php 0000664 0000000 0000000 00000000606 12656251722 0020546 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://hosted.ap.org/dynamic/stories/A/AS_CHINA_GAO_ZHISHENG?SITE=AP&SECTION=HOME&TEMPLATE=DEFAULT',
'body' => array(
'//img[@class="ap-smallphoto-img"]',
'//span[@class="entry-content"]',
),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/areadvd.de.php 0000664 0000000 0000000 00000000443 12656251722 0021354 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.areadvd.de/news/daily-deals-angebote-bei-lautsprecher-teufel-3/',
'body' => array('//div[contains(@class,"entry")]'),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/arstechnica.com.php 0000664 0000000 0000000 00000001364 12656251722 0022423 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://arstechnica.com/tech-policy/2015/09/judge-warners-2m-happy-birthday-copyright-is-bogus/',
'body' => array(
'//header/h2',
'//section[@id="article-guts"]',
'//div[@class="superscroll-content show"]',
'//div[@class="gallery"]',
),
'next_page' => '//span[@class="numbers"]/a',
'strip' => array(
'//figcaption',
'//div[@class="post-meta"]',
'//div[@class="gallery-image-credit"]',
'//aside',
'//div[@class="article-expander"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/awkwardzombie.com.php 0000664 0000000 0000000 00000000423 12656251722 0023000 0 ustar 00root root 0000000 0000000 array(
'%/index.php.*comic=.*%' => array(
'test_url' => 'http://www.awkwardzombie.com/index.php?comic=041315',
'body' => array('//*[@id="comic"]/img'),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/bangkokpost.com.php 0000664 0000000 0000000 00000001046 12656251722 0022456 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.bangkokpost.com/news/politics/704204/new-us-ambassador-arrives-in-bangkok',
'body' => array(
'//div[@class="articleContents"]',
),
'strip' => array(
'//h2',
'//h4',
'//div[@class="text-size"]',
'//div[@class="relate-story"]',
'//div[@class="text-ads"]',
'//script',
'//ul',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/bgr.com.php 0000664 0000000 0000000 00000000571 12656251722 0020710 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://bgr.com/2015/09/27/iphone-6s-waterproof-testing/',
'body' => array(
'//img[contains(@class,"img")]',
'//div[@class="text-column"]',
),
'strip' => array(
'//strong',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/bigfootjustice.com.php 0000664 0000000 0000000 00000000172 12656251722 0023153 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'%-150x150%' => '',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/bizjournals.com.php 0000664 0000000 0000000 00000000550 12656251722 0022475 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.bizjournals.com/milwaukee/news/2015/09/30/bucks-will-hike-prices-on-best-seats-at-new-arena.html',
'body' => array(
'//figure/div/a/img',
'//p[@class="content__segment"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/blog.fefe.de.php 0000664 0000000 0000000 00000000423 12656251722 0021573 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://blog.fefe.de/?ts=ad706a73',
'body' => array(
'/html/body/ul',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/blog.mapillary.com.php 0000664 0000000 0000000 00000000453 12656251722 0023051 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://blog.mapillary.com/update/2015/08/26/traffic-sign-updates.html',
'body' => array(
'//div[contains(@class, "blog-post__content")]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/buenosairesherald.com.php 0000664 0000000 0000000 00000001076 12656251722 0023636 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.buenosairesherald.com/article/199344/manzur-named-next-governor-of-tucum%C3%A1n',
'body' => array(
'//div[@style="float:none"]',
),
'strip' => array(
'//div[contains(@class, "bz_alias_short_desc_container"]',
'//td[@id="bz_show_bug_column_1"]',
'//table[@id="attachment_table"]',
'//table[@class="bz_comment_table"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/bunicomic.com.php 0000664 0000000 0000000 00000000451 12656251722 0022103 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.bunicomic.com/comic/buni-623/',
'body' => array(
'//div[@class="comic-table"]',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/buttersafe.com.php 0000664 0000000 0000000 00000000526 12656251722 0022302 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://buttersafe.com/2015/04/21/the-incredible-flexible-man/',
'body' => array(
'//div[@id="comic"]',
'//div[@class="post-comic"]',
),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/cad-comic.com.php 0000664 0000000 0000000 00000000431 12656251722 0021750 0 ustar 00root root 0000000 0000000 array(
'%/cad/.+%' => array(
'test_url' => 'http://www.cad-comic.com/cad/20150417',
'body' => array(
'//*[@id="content"]/img',
),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/chaoslife.findchaos.com.php 0000664 0000000 0000000 00000000373 12656251722 0024030 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://chaoslife.findchaos.com/pets-in-the-wild',
'body' => array('//div[@id="comic"]'),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/cliquerefresh.com.php 0000664 0000000 0000000 00000000464 12656251722 0023000 0 ustar 00root root 0000000 0000000 array(
'%/comic.*%' => array(
'test_url' => 'http://cliquerefresh.com/comic/078-stating-the-obvious/',
'body' => array('//div[@class="comicImg"]/img | //div[@class="comicImg"]/a/img'),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/cnet.com.php 0000664 0000000 0000000 00000002770 12656251722 0021072 0 ustar 00root root 0000000 0000000 array(
'%^/products.*%' => array(
'test_url' => 'http://www.cnet.com/products/fibaro-flood-sensor/#ftag=CADf328eec',
'body' => array(
'//li[contains(@class,"slide first"] || //figure[contains(@class,(promoFigure))]',
'//div[@class="quickInfo"]',
'//div[@class="col-6 ratings"]',
'//div[@id="editorReview"]',
),
'strip' => array(
'//script',
'//a[@class="clickToEnlarge"]',
'//div[@section="topSharebar"]',
'//div[contains(@class,"related")]',
'//div[contains(@class,"ad-")]',
'//div[@section="shortcodeGallery"]',
),
),
'%.*%' => array(
'test_url' => 'http://cnet.com.feedsportal.com/c/34938/f/645093/s/4a340866/sc/28/l/0L0Scnet0N0Cnews0Cman0Eclaims0Eonline0Epsychic0Emade0Ehim0Ebuy0E10Emillion0Epowerball0Ewinning0Eticket0C0Tftag0FCAD590Aa51e/story01.htm',
'body' => array(
'//p[@itemprop="description"]',
'//div[@itemprop="articleBody"]',
),
'strip' => array(
'//script',
'//a[@class="clickToEnlarge"]',
'//div[@section="topSharebar"]',
'//div[contains(@class,"related")]',
'//div[contains(@class,"ad-")]',
'//div[@section="shortcodeGallery"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/consomac.fr.php 0000664 0000000 0000000 00000000523 12656251722 0021566 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://consomac.fr/news-2430-l-iphone-6-toujours-un-secret-bien-garde.html',
'body' => array(
'//div[contains(@id, "newscontent")]',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/cowbirdsinlove.com.php 0000664 0000000 0000000 00000000213 12656251722 0023160 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'%title="(.+)" */>%' => '/>
$1',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/csmonitor.com.php 0000664 0000000 0000000 00000001107 12656251722 0022147 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.csmonitor.com/USA/Politics/2015/0925/John-Boehner-steps-down-Self-sacrificing-but-will-it-lead-to-better-government',
'body' => array(
'//figure[@id="image-top-1"]',
'//div[@id="story-body"]',
),
'strip' => array(
'//script',
'//img[@title="hide caption"]',
'//*[contains(@class,"promo_link")]',
'//div[@id="story-embed-column"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/dailyjs.com.php 0000664 0000000 0000000 00000001077 12656251722 0021577 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://dailyjs.com/2014/08/07/p5js/',
'body' => array(
'//div[@id="post"]',
),
'strip' => array(
'//h2[@class="post"]',
'//div[@class="meta"]',
'//*[contains(@class, "addthis_toolbox")]',
'//*[contains(@class, "addthis_default_style")]',
'//*[@class="navigation small"]',
'//*[@id="related"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/dailyreporter.com.php 0000664 0000000 0000000 00000000732 12656251722 0023022 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://dailyreporter.com/2016/01/09/us-supreme-court-case-could-weaken-government-workers-unions/',
'body' => array(
'//div[contains(@class, "entry-content")]',
),
'strip' => array(
'//div[@class="dmcss_login_form"]',
'//*[contains(@class, "sharedaddy")]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/dailytech.com.php 0000664 0000000 0000000 00000000574 12656251722 0022107 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.dailytech.com/Apples+First+Fixes+to+iOS+9+Land+w+iOS++901+Release/article37495.htm',
'body' => array(
'//div[@class="NewsBodyImage"]',
'//span[@id="lblSummary"]',
'//span[@id="lblBody"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/degroupnews.com.php 0000664 0000000 0000000 00000000626 12656251722 0022501 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.degroupnews.com/medias/vodsvod/amazon-concurrence-la-chromecast-de-google-avec-fire-tv-stick',
'body' => array(
'//div[@class="contenu"]',
),
'strip' => array(
'//div[contains(@class, "a2a")]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/derstandard.at.php 0000664 0000000 0000000 00000000615 12656251722 0022256 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://derstandard.at/2000010267354/The-Witcher-3-Hohe-Hardware-Anforderungen-fuer-PC-Spieler?ref=rss',
'body' => array(
'//div[@class="copytext"]',
'//ul[@id="media-list"]',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/dilbert.com.php 0000664 0000000 0000000 00000000404 12656251722 0021556 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'body' => array(
'//img[@class="img-responsive img-comic"]',
),
'test_url' => 'http://dilbert.com/strip/2016-01-28',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/discovermagazine.com.php 0000664 0000000 0000000 00000000761 12656251722 0023471 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://blogs.discovermagazine.com/the-extremo-files/2015/09/11/have-scientists-found-the-worlds-deepest-fish/',
'body' => array(
'//div[@class="entry"]',
),
'strip' => array(
'//h1',
'//div[@class="meta"]',
'//div[@class="shareIcons"]',
'//div[@class="navigation"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/distrowatch.com.php 0000664 0000000 0000000 00000000444 12656251722 0022470 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://distrowatch.com/?newsid=08355',
'body' => array(
'//td[@class="NewsText"][1]',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/dozodomo.com.php 0000664 0000000 0000000 00000000636 12656251722 0021772 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://dozodomo.com/bento/2014/03/04/lart-des-maki-de-takayo-kiyota/',
'body' => array(
'//div[@class="joke"]',
'//div[@class="story-cover"]',
'//div[@class="story-content"]',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/drawingboardcomic.com.php 0000664 0000000 0000000 00000000564 12656251722 0023616 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'body' => array('//img[@id="comicimage"]'),
'strip' => array(),
'test_url' => 'http://drawingboardcomic.com/index.php?comic=208',
),
),
'filter' => array(
'%.*%' => array(
'%title="(.+)" */>%' => '/>
$1',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/endlessorigami.com.php 0000664 0000000 0000000 00000000172 12656251722 0023140 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'%-150x150%' => '',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/engadget.com.php 0000664 0000000 0000000 00000000520 12656251722 0021706 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.engadget.com/2015/04/20/dark-matter-discovery/?ncid=rss_truncated',
'body' => array('//div[@id="page_body"]/div[@class="container@m-"]'),
'strip' => array('//aside[@role="banner"]'),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/escapistmagazine.com.php 0000664 0000000 0000000 00000006177 12656251722 0023475 0 ustar 00root root 0000000 0000000 array(
'%/articles/view/comicsandcosplay/comics/critical-miss.*%' => array(
'body' => array('//*[@class="body"]/span/img | //div[@class="folder_nav_links"]/following::p'),
'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/critical-miss/13776-Critical-Miss-on-Framerates?utm_source=rss&utm_medium=rss&utm_campaign=articles',
'strip' => array(),
),
'%/articles/view/comicsandcosplay/comics/namegame.*%' => array(
'body' => array('//*[@class="body"]/span/p/img[@height != "120"]'),
'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/namegame/9759-Leaving-the-Nest?utm_source=rss&utm_medium=rss&utm_campaign=articles',
'strip' => array(),
),
'%/articles/view/comicsandcosplay/comics/stolen-pixels.*%' => array(
'body' => array('//*[@class="body"]/span/p[2]/img'),
'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/stolen-pixels/8866-Stolen-Pixels-258-Where-the-Boys-Are?utm_source=rss&utm_medium=rss&utm_campaign=articles',
'strip' => array(),
),
'%/articles/view/comicsandcosplay/comics/bumhugparade.*%' => array(
'body' => array('//*[@class="body"]/span/p[2]/img'),
'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/bumhugparade/8262-Bumhug-Parade-13?utm_source=rss&utm_medium=rss&utm_campaign=articles',
'strip' => array(),
),
'%/articles/view/comicsandcosplay.*/comics/escapistradiotheater%' => array(
'body' => array('//*[@class="body"]/span/p[2]/img'),
'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/escapistradiotheater/8265-The-Escapist-Radio-Theater-13?utm_source=rss&utm_medium=rss&utm_campaign=articles',
'strip' => array(),
),
'%/articles/view/comicsandcosplay/comics/paused.*%' => array(
'body' => array('//*[@class="body"]/span/p[2]/img | //*[@class="body"]/span/div/img'),
'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/paused/8263-Paused-16?utm_source=rss&utm_medium=rss&utm_campaign=articles',
'strip' => array(),
),
'%/articles/view/comicsandcosplay/comics/fraughtwithperil.*%' => array(
'body' => array('//*[@class="body"]'),
'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/fraughtwithperil/12166-The-Escapist-Presents-Escapist-Comics-Critical-Miss-B-lyeh-Fhlop?utm_source=rss&utm_medium=rss&utm_campaign=articles',
'strip' => array(),
),
'%/articles/view/video-games/columns/.*%' => array(
'body' => array('//*[@id="article_content"]'),
'test_url' => 'http://www.escapistmagazine.com/articles/view/video-games/columns/experienced-points/13971-What-50-Shades-and-Batman-Have-in-Common.2',
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/espn.go.com.php 0000664 0000000 0000000 00000000411 12656251722 0021500 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://espn.go.com/nfl/story/_/id/13388208/jason-whitlock-chip-kelly-controversy',
'body' => array(
'//p',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/exocomics.com.php 0000664 0000000 0000000 00000000540 12656251722 0022123 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'body' => array('//a[@class="comic"]/img'),
'strip' => array(),
'test_url' => 'http://www.exocomics.com/379',
),
),
'filter' => array(
'%.*%' => array(
'%title="(.+)" */>%' => '/>
$1',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/explosm.net.php 0000664 0000000 0000000 00000000441 12656251722 0021631 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://explosm.net/comics/3803/',
'body' => array(
'//div[@id="comic-container"]',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/extrafabulouscomics.com.php 0000664 0000000 0000000 00000000173 12656251722 0024216 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'%-150x150%' => '',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/fastcodesign.com.php 0000664 0000000 0000000 00000000553 12656251722 0022607 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.fastcodesign.com/3026548/exposure/peek-inside-the-worlds-forbidden-subway-tunnels',
'body' => array(
'//article[contains(@class, "body prose")]',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/fastcoexist.com.php 0000664 0000000 0000000 00000000611 12656251722 0022465 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.fastcoexist.com/3026114/take-a-seat-on-this-gates-funded-future-toilet-that-will-change-how-we-think-about-poop',
'body' => array(
'//article[contains(@class, "body prose")]',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/fastcompany.com.php 0000664 0000000 0000000 00000000554 12656251722 0022463 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.fastcompany.com/3026712/fast-feed/elon-musk-an-apple-tesla-merger-is-very-unlikely',
'body' => array(
'//article[contains(@class, "body prose")]',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/ffworld.com.php 0000664 0000000 0000000 00000000461 12656251722 0021577 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.ffworld.com/?rub=news&page=voir&id=2709',
'body' => array(
'//div[@class="news_body"]',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/foreignpolicy.com.php 0000664 0000000 0000000 00000001276 12656251722 0023012 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://foreignpolicy.com/2016/01/09/networking-giant-pulls-nsa-linked-code-exploited-by-hackers/',
'body' => array(
'//article',
),
'strip' => array(
'//div[@id="post-category"]',
'//div[@id="desktop-right"]',
'//h1',
'//section[@class="article-meta"]',
'//div[@class="side-panel-wrapper"]',
'//*[contains(@class, "share-")]',
'//*[contains(@id, "taboola-")]',
'//div[@class="comments"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/fossbytes.com.php 0000664 0000000 0000000 00000001443 12656251722 0022156 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://fossbytes.com/fbi-hacked-1000-computers-to-shut-down-largest-child-pornography-site-on-the-dark-web/',
'body' => array(
'//div[@class="entry-inner"]',
),
'strip' => array(
'//*[@class="at-above-post addthis_default_style addthis_toolbox at-wordpress-hide"]',
'//*[@class="at-below-post addthis_default_style addthis_toolbox at-wordpress-hide"]',
'//*[@class="at-below-post-recommended addthis_default_style addthis_toolbox at-wordpress-hide"]',
'//*[@class="code-block code-block-12 ai-desktop"]',
'//*[@class="code-block code-block-13 ai-tablet-phone"]',
),
),
),
); picoFeed-0.1.18/lib/PicoFeed/Rules/fowllanguagecomics.com.php 0000664 0000000 0000000 00000000431 12656251722 0024002 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'body' => array('//*[@id="comic"] | //*[@class="post-image"]'),
'strip' => array(),
'test_url' => 'http://www.fowllanguagecomics.com/comic/working-out/',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/geek.com.php 0000664 0000000 0000000 00000000674 12656251722 0021055 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.geek.com/news/the-11-best-ways-to-eat-eggs-1634076/',
'body' => array(
'//div[@class="articleinfo"]/figure',
'//div[@class="articleinfo"]/article',
'//span[@class="by"]',
),
'strip' => array(
'//span[@class="red"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/gerbilwithajetpack.com.php 0000664 0000000 0000000 00000000455 12656251722 0024002 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'body' => array(
'//div[@id="comic-1"]',
'//div[@class="entry"]',
),
'test_url' => 'http://gerbilwithajetpack.com/passing-the-digital-buck/',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/giantitp.com.php 0000664 0000000 0000000 00000000422 12656251722 0021750 0 ustar 00root root 0000000 0000000 array(
'%/comics/oots.*%' => array(
'test_url' => 'http://www.giantitp.com/comics/oots0989.html',
'body' => array(
'//td[@align="center"]/img',
),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/github.com.php 0000664 0000000 0000000 00000000530 12656251722 0021413 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'https://github.com/audreyr/favicon-cheat-sheet',
'body' => array(
'//article[contains(@class, "entry-content")]',
),
'strip' => array(
'//h1',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/gocomics.com.php 0000664 0000000 0000000 00000000443 12656251722 0021737 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.gocomics.com/pearlsbeforeswine/2015/05/30',
'body' => array(
'//div[1]/p[1]/a[1]/img',
),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/golem.de.php 0000664 0000000 0000000 00000000546 12656251722 0021055 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.golem.de/news/breko-telekom-verzoegert-gezielt-den-vectoring-ausbau-1311-102974.html',
'body' => array(
'//header[@class="cluster-header"]',
'//div[@class="formatted"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/greekculture.com.php 0000664 0000000 0000000 00000000421 12656251722 0022631 0 ustar 00root root 0000000 0000000 array(
'%/joyoftech/.*%' => array(
'body' => array(
'//img[@width="640"]',
),
'test_url' => 'http://www.geekculture.com/joyoftech/joyarchives/2235.html',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/happletea.com.php 0000664 0000000 0000000 00000000715 12656251722 0022101 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'body' => array(
'//div[@id="comic"]',
'//div[@class="entry"]',
),
'strip' => array('//div[@class="ssba"]'),
'test_url' => 'http://www.happletea.com/comic/mans-best-friend/',
),
),
'filter' => array(
'%.*%' => array(
'%title="(.+)" */>%' => '/>
$1',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/heise.de.php 0000664 0000000 0000000 00000000575 12656251722 0021051 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.heise.de/security/meldung/BND-300-Millionen-Euro-fuer-Fruehwarnsystem-gegen-Cyber-Attacken-2192237.html',
'body' => array(
'//div[@class="meldung_wrapper"]',
'//div[@class="artikel_content"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/huffingtonpost.com.php 0000664 0000000 0000000 00000000531 12656251722 0023207 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.huffingtonpost.com/2014/02/20/centscere-social-media-syracuse_n_4823848.html',
'body' => array(
'//article[@class="content")]',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/imogenquest.net.php 0000664 0000000 0000000 00000000213 12656251722 0022477 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'%title="(.+)" */>%' => '/>
$1',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/ing.dk.php 0000664 0000000 0000000 00000000557 12656251722 0020537 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://ing.dk/artikel/smart-husisolering-og-styring-skal-mindske-japans-energikrise-164517',
'body' => array(
'//section[contains(@class, "teaser")]',
'//section[contains(@class, "body")]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/invisiblebread.com.php 0000664 0000000 0000000 00000000322 12656251722 0023112 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'%()%' => '$1
',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/ir.amd.com.php 0000664 0000000 0000000 00000000432 12656251722 0021304 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'body' => array('//span[@class="ccbnTxt"]'),
'strip' => array(),
'test_url' => 'http://ir.amd.com/phoenix.zhtml?c=74093&p=RssLanding&cat=news&id=2055819',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/japantimes.co.jp.php 0000664 0000000 0000000 00000001272 12656251722 0022523 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.japantimes.co.jp/news/2015/09/27/world/social-issues-world/pope-meets-sex-abuse-victims-philadelphia-promises-accountability/',
'body' => array(
'//article[@role="main"]',
),
'strip' => array(
'//script',
'//header',
'//div[contains(@class, "meta")]',
'//div[@class="clearfix"]',
'//div[@class="OUTBRAIN"]',
'//ul[@id="content_footer_menu"]',
'//div[@class="article_footer_ad"]',
'//div[@id="disqus_thread"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/japantoday.com.php 0000664 0000000 0000000 00000000631 12656251722 0022265 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.japantoday.com/category/politics/view/japan-u-s-to-sign-new-base-environment-pact',
'body' => array(
'//div[@id="article_container"]',
),
'strip' => array(
'//h2',
'//div[@id="article_info"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/journaldugeek.com.php 0000664 0000000 0000000 00000000444 12656251722 0022774 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www./2014/05/20/le-playstation-now-arrive-en-beta-fermee-aux-etats-unis/',
'body' => array(
'//div[@class="post-content"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/jsonline.com.php 0000664 0000000 0000000 00000001460 12656251722 0021755 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.jsonline.com/news/usandworld/as-many-as-a-million-expected-for-popes-last-mass-in-us-b99585180z1-329688131.html',
'body' => array(
'//div[@id="mainContent"]',
),
'strip' => array(
'//script',
'//h1',
'//h4[@class="credit"]',
'//div[@class="columnist_container"]',
'//div[@class="storyTimestamp"]',
'//ul[@id="sharing-tools"]',
'//div[@class="title"]',
'//img[@class="floatLeft"]',
'//div[@class="first feature"]',
'//div[@class="collateral_article_content"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/kanpai.fr.php 0000664 0000000 0000000 00000000502 12656251722 0021224 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.kanpai.fr/japon/comment-donner-lheure-en-japonais.html',
'body' => array(
'//div[@class="single-left"]',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/karriere.jobfinder.dk.php 0000664 0000000 0000000 00000000601 12656251722 0023515 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://karriere.jobfinder.dk/artikel/dansk-professor-skal-lede-smart-grid-forskning-20-millioner-dollars-763',
'body' => array(
'//section[contains(@class, "teaser")]',
'//section[contains(@class, "body")]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/koreaherald.com.php 0000664 0000000 0000000 00000000406 12656251722 0022414 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.koreaherald.com/view.php?ud=20150926000018',
'body' => array(
'//div[@class="content_view"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/koreatimes.php 0000664 0000000 0000000 00000000612 12656251722 0021520 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.koreatimes.co.kr/www/news/nation/2015/12/116_192409.html',
'body' => array(
'//div[@id="p"]',
),
'strip' => array(
'//script',
'//div[@id="webtalks_btn_listenDiv"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/lastplacecomics.com.php 0000664 0000000 0000000 00000000173 12656251722 0023302 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'%-150x150%' => '',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/lejapon.fr.php 0000664 0000000 0000000 00000001022 12656251722 0021407 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://lejapon.fr/guide-voyage-japon/5223/tokyo-sous-la-neige.htm',
'body' => array(
'//div[@class="entry"]',
),
'strip' => array(
'//*[contains(@class, "addthis_toolbox")]',
'//*[contains(@class, "addthis_default_style")]',
'//*[@class="navigation small"]',
'//*[@id="related"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/lesjoiesducode.fr.php 0000664 0000000 0000000 00000000543 12656251722 0022767 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://lesjoiesducode.fr/post/75576211207/quand-lappli-ne-fonctionne-plus-sans-aucune-raison',
'body' => array(
'//div[@class="blog-post-content"]',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/lfg.co.php 0000664 0000000 0000000 00000000632 12656251722 0020527 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.lfg.co/page/871/?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+LookingForGroup+%28Looking+For+Group%29&utm_content=FeedBurner',
'body' => array(
'//*[@id="comic"]/img | //*[@class="content"]',
),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/lifehacker.com.php 0000664 0000000 0000000 00000001064 12656251722 0022231 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://lifehacker.com/bring-water-bottle-caps-into-concerts-to-protect-your-d-1269334973',
'body' => array(
'//div[contains(@class, "row")/img',
'//div[contains(@class, "content-column")]',
),
'strip' => array(
'//*[contains(@class, "meta")]',
'//span[contains(@class, "icon")]',
'//h1',
'//aside',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/linux.org.php 0000664 0000000 0000000 00000000537 12656251722 0021310 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.linux.org/threads/lua-the-scripting-interpreter.8352/',
'body' => array(
'//div[@class="messageContent"]',
),
'strip' => array(
'//aside',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/linuxinsider.com.php 0000664 0000000 0000000 00000001065 12656251722 0022652 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.linuxinsider.com/story/82526.html?rss=1',
'body' => array(
'//div[@id="story"]',
),
'strip' => array(
'//script',
'//h1',
'//div[@id="story-toolbox1"]',
'//div[@id="story-byline"]',
'//div[@id="story"]/p',
'//div[@class="story-advertisement"]',
'//iframe',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/lists.php 0000664 0000000 0000000 00000000471 12656251722 0020516 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://lists.freebsd.org/pipermail/freebsd-announce/2013-September/001504.html',
'body' => array(
'//pre',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/loadingartist.com.php 0000664 0000000 0000000 00000000172 12656251722 0022777 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'%-150x150%' => '',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/loldwell.com.php 0000664 0000000 0000000 00000000362 12656251722 0021752 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://loldwell.com/?comic=food-math-101',
'body' => array('//*[@id="comic"]'),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/lukesurl.com.php 0000664 0000000 0000000 00000000571 12656251722 0022004 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'body' => array('//div[@id="comic"]//img'),
'strip' => array(),
'test_url' => 'http://www.lukesurl.com/archives/comic/665-3-of-clubs',
),
),
'filter' => array(
'%.*%' => array(
'%title="(.+)" */>%' => '/>
$1',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/macg.co.php 0000664 0000000 0000000 00000000541 12656251722 0020665 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.macg.co//logiciels/2014/05/feedly-sameliore-un-petit-peu-sur-mac-82205',
'body' => array(
'//div[contains(@class, "field-name-body")]',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/marc.info.php 0000664 0000000 0000000 00000000441 12656251722 0021231 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://marc.info/?l=openbsd-misc&m=141987113202061&w=2',
'body' => array(
'//pre',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/marriedtothesea.com.php 0000664 0000000 0000000 00000000440 12656251722 0023311 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.marriedtothesea.com/index.php?date=052915',
'body' => array(
'//div[@align]/a/img',
),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/marycagle.com.php 0000664 0000000 0000000 00000000527 12656251722 0022103 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'body' => array(
'//img[@id="cc-comic"]',
'//div[@class="cc-newsbody"]',
),
'strip' => array(),
'test_url' => 'http://www.marycagle.com/letsspeakenglish/74-grim-reality/',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/maximumble.thebookofbiff.com.php 0000664 0000000 0000000 00000000421 12656251722 0025076 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://maximumble.thebookofbiff.com/2015/04/20/1084-change/',
'body' => array('//div[@id="comic"]/div/a/img'),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/medium.com.php 0000664 0000000 0000000 00000000477 12656251722 0021423 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'https://medium.com/lessons-learned/917b8b63ae3e',
'body' => array(
'//div[contains(@class, "post-field body")]',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/mercworks.net.php 0000664 0000000 0000000 00000000721 12656251722 0022157 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'body' => array('//div[@id="comic"]',
'//div[contains(@class,"entry-content")]',
),
'strip' => array(),
'test_url' => 'http://mercworks.net/comicland/healthy-choice/',
),
),
'filter' => array(
'%.*%' => array(
'%title="(.+)" */>%' => '/>
$1',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/metronieuws.nl.php 0000664 0000000 0000000 00000000617 12656251722 0022353 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.metronieuws.nl/sport/2015/04/broer-fellaini-zorgde-bijna-voor-paniek-bij-mourinho',
'body' => array('//div[contains(@class,"article-top")]/div[contains(@class,"image-component")] | //div[@class="article-full-width"]/div[1]'),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/milwaukeenns.php 0000664 0000000 0000000 00000000630 12656251722 0022057 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://milwaukeenns.org/2016/01/08/united-way-grant-enables-sdc-to-restore-free-tax-assistance-program/',
'body' => array(
'//div[@class="pf-content"]',
),
'strip' => array(
'//div[@class="printfriendly"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/mlb.com.php 0000664 0000000 0000000 00000001034 12656251722 0020703 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://m.brewers.mlb.com/news/article/161364798',
'body' => array(
'//article',
),
'strip' => array(
'//div[@class="article-top"]',
'//div[contains(@class, "contributor-bottom")]',
'//p[@class="tagline"]',
'//div[contains(@class, "social-")]',
'//div[@class="button-wrap"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/mokepon.smackjeeves.com.php 0000664 0000000 0000000 00000000475 12656251722 0024110 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://mokepon.smackjeeves.com/comics/2120096/chapter-9-page-68/',
'body' => array('//*[@id="comic_area_inner"]/img | //*[@id="comic_area_inner"]/a/img'),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/monwindowsphone.com.php 0000664 0000000 0000000 00000000546 12656251722 0023376 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.monwindowsphone.com/tout-savoir-sur-le-centre-d-action-de-windows-phone-8-1-t40574.html',
'body' => array(
'//div[@class="blog-post-body"]',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/mrlovenstein.com.php 0000664 0000000 0000000 00000000263 12656251722 0022661 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'%alt="(.+)" */>%' => '/>
$1',
'%\.png%' => '_rollover.png',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/muckrock.com.php 0000664 0000000 0000000 00000000653 12656251722 0021755 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'https://www.muckrock.com/news/archives/2016/jan/13/5-concerns-private-prisons/',
'body' => array(
'//div[@class="content"]',
),
'strip' => array(
'//h1',
'//div[@class="secondary"]',
'//aside',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/nationaljournal.com.php 0000664 0000000 0000000 00000000727 12656251722 0023341 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.nationaljournal.com/s/354962/south-carolina-evangelicals-outstrip-establishment?mref=home_top_main',
'body' => array(
'//div[@class="section-body"]',
),
'strip' => array(
'//*[contains(@class, "-related")]',
'//*[contains(@class, "social")]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/nature.com.php 0000664 0000000 0000000 00000000470 12656251722 0021432 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.nature.com/doifinder/10.1038/nature.2015.18340',
'body' => array(
'//div[contains(@class,"main-content")]',
),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/nba.com.php 0000664 0000000 0000000 00000000702 12656251722 0020672 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.nba.com/2015/news/09/25/knicks-jackson-to-spend-more-time-around-coaching-staff.ap/index.html?rss=true',
'body' => array(
'//section[@id="nbaArticleContent"]',
),
'strip' => array(
'//div[@id="nbaArticleSocialWrapper_bot"]',
'//h5',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/nedroid.com.php 0000664 0000000 0000000 00000000213 12656251722 0021553 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'%title="(.+)" */>%' => '/>
$1',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/networkworld.com.php 0000664 0000000 0000000 00000001242 12656251722 0022673 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.networkworld.com/article/3020585/security/the-incident-response-fab-five.html',
'body' => array(
'//figure/img[@class="hero-img"]',
'//section[@class="deck"]',
'//div[@itemprop="articleBody"] | //div[@itemprop="reviewBody"]',
'//div[@class="carousel-inside-crop"]',
),
'strip' => array(
'//script',
'//aside',
'//div[@class="credit"]',
'//div[@class="view-large"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/neustadt-ticker.de.php 0000664 0000000 0000000 00000000676 12656251722 0023064 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.neustadt-ticker.de/41302/alltag/kultur/demo-auf-der-boehmischen',
'body' => array(
'//div[@class="entry-content"]',
),
'strip' => array(
'//*[contains(@class, "sharedaddy")]',
'//*[contains(@class, "yarpp-related")]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/news.sciencemag.org.php 0000664 0000000 0000000 00000001066 12656251722 0023220 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://news.sciencemag.org/biology/2015/09/genetic-engineering-turns-common-plant-cancer-fighter',
'body' => array(
'//div[@class="content"]',
),
'strip' => array(
'//h1[@class="snews-article__headline"]',
'//div[contains(@class,"easy_social_box")]',
'//div[@class="author-teaser"]',
'//div[@class="article-byline"]',
),
),
)
);
picoFeed-0.1.18/lib/PicoFeed/Rules/niceteethcomic.com.php 0000664 0000000 0000000 00000000413 12656251722 0023114 0 ustar 00root root 0000000 0000000 array(
'%/archives.*%' => array(
'test_url' => 'http://niceteethcomic.com/archives/page119/',
'body' => array('//*[@class="comicpane"]/a/img'),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/nichtlustig.de.php 0000664 0000000 0000000 00000000334 12656251722 0022302 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'%.*static.nichtlustig.de/comics/full/(\\d+).*%s' => '
',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/oglaf.com.php 0000664 0000000 0000000 00000000740 12656251722 0021224 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'body' => array(
'//img[@id="strip"]',
'//a/div[@id="nx"]/..',
),
'strip' => array(),
'test_url' => 'http://oglaf.com/slodging/',
),
),
'filter' => array(
'%.*%' => array(
'%alt="(.+)" title="(.+)" */>%' => '/>
$1
$2
',
'%%' => 'Next page',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/onhax.net.php 0000664 0000000 0000000 00000000650 12656251722 0021261 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://onhax.net/process-lasso-8-9-1-4-pro-key-portable-is-here-latest',
'body' => array(
'//div[@class="postcontent"]',
),
'strip' => array(
'//*[@class="sharedaddy sd-sharing-enabled"]',
'//*[@class="yarpp-related"]',
),
),
),
); picoFeed-0.1.18/lib/PicoFeed/Rules/onmilwaukee.php 0000664 0000000 0000000 00000001451 12656251722 0021677 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://onmilwaukee.com/movies/articles/downerspelunking.html',
'body' => array(
'//article[contains(@class, "show")]',
),
'strip' => array(
'//h1',
'//div[contains(@class, "-ad")]',
'//div[contains(@class, "_ad")]',
'//div[@id="pub_wrapper"]',
'//div[contains(@class, "share_tools")]',
'//div[@class="clearfix"]',
'//div[contains(@class,"image_control")]',
'//section[@class="ribboned"]',
'//div[contains(@class,"sidebar")]',
'//aside[@class="article_tag_list"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/openrightsgroup.org.php 0000664 0000000 0000000 00000001162 12656251722 0023403 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'https://www.openrightsgroup.org/blog/2014/3-days-to-go-till-orgcon2014',
'body' => array(
'//div[contains(@class, "content")]/div',
),
'strip' => array(
'//h2[1]',
'//div[@class="info"]',
'//div[@class="tags"]',
'//div[@class="comments"]',
'//div[@class="breadcrumbs"]',
'//h1[@class="pageTitle"]',
'//p[@class="bookmarkThis"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/opensource.com.php 0000664 0000000 0000000 00000000544 12656251722 0022320 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://opensource.com/life/15/10/how-internet-things-will-change-way-we-think',
'body' => array(
'//img[@class="image-full-size"]',
'//div[contains(@class="field-type-text-with-summary")]',
),
)
)
);
picoFeed-0.1.18/lib/PicoFeed/Rules/optipess.com.php 0000664 0000000 0000000 00000000213 12656251722 0021775 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'%title="(.+)" */>%' => '/>
$1',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/osnews.com.php 0000664 0000000 0000000 00000000451 12656251722 0021451 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://osnews.com/story/28863/Google_said_to_be_under_US_antitrust_scrutiny_over_Android',
'body' => array(
'//div[@class="newscontent1"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/pastebin.com.php 0000664 0000000 0000000 00000000426 12656251722 0021742 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://pastebin.com/ed1pP9Ak',
'body' => array(
'//div[@class="text"]',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/peebleslab.com.php 0000664 0000000 0000000 00000000322 12656251722 0022226 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
// the extra space is required to strip the title cleanly
'%title="(.+) " */>%' => '/>
$1',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/penny-arcade.com.php 0000664 0000000 0000000 00000001066 12656251722 0022504 0 ustar 00root root 0000000 0000000 array(
'%/news/.*%' => array(
'test_url' => 'http://penny-arcade.com/news/post/2015/04/15/101-part-two',
'body' => array(
'//*[@class="postBody"]/*',
),
'strip' => array(
),
),
'%/comic/.*%' => array(
'test_url' => 'http://penny-arcade.com/comic/2015/04/15',
'body' => array(
'//*[@id="comicFrame"]/a/img',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/pixelbeat.org.php 0000664 0000000 0000000 00000000471 12656251722 0022123 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.pixelbeat.org/programming/sigpipe_handling.html#1425573246',
'body' => array(
'//div[@class="contentText"]',
),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/plus.google.com.php 0000664 0000000 0000000 00000000437 12656251722 0022375 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'https://plus.google.com/+LarryPage/posts/Lh8SKC6sED1',
'body' => array(
'//div[@role="article"]/div[contains(@class, "eE")]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/popstrip.com.php 0000664 0000000 0000000 00000000231 12656251722 0022007 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'%( '$1$2$1bonus.png"/>',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/putaindecode.fr.php 0000664 0000000 0000000 00000000720 12656251722 0022427 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://putaindecode.fr/posts/js/etat-lieux-js-modulaire-front/',
'body' => array(
'//*[@class="putainde-Post-md"]',
),
'strip' => array(
'//*[contains(@class, "inlineimg")]',
'//*[contains(@class, "comment-respond")]',
'//header',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/recode.net.php 0000664 0000000 0000000 00000001274 12656251722 0021410 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://recode.net/2015/09/26/big-tech-rolls-out-red-carpet-for-indian-prime-minister-lobbies-behind-closed-doors/',
'body' => array(
'//img[contains(@class,"attachment-large")]',
'//div[contains(@class,"postarea")]',
'//li[@class,"author"]',
),
'strip' => array(
'//script',
'//div[contains(@class,"sharedaddy")]',
'//div[@class="post-send-off"]',
'//div[@class="large-12 columns"]',
'//div[contains(@class,"inner-related-article")]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/retractionwatch.com.php 0000664 0000000 0000000 00000001037 12656251722 0023335 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://retractionwatch.com/2015/11/12/psychologist-jens-forster-settles-case-by-agreeing-to-2-retractions/',
'body' => array(
'//*[@class="main"]',
'//*[@class="entry-content"]',
),
'strip' => array(
'//*[contains(@class, "sharedaddy")]',
'//*[contains(@class, "jp-relatedposts")]',
'//p[@class="p1"]',
)
)
)
);
picoFeed-0.1.18/lib/PicoFeed/Rules/rue89.nouvelobs.com.php 0000664 0000000 0000000 00000000724 12656251722 0023125 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://rue89.feedsportal.com/c/33822/f/608948/s/30999fa0/sc/24/l/0L0Srue890N0C20A130C0A80C30A0Cfaisait0Eboris0Eboillon0Eex0Esarko0Eboy0E350A0E0A0A0A0Eeuros0Egare0Enord0E245315/story01.htm',
'body' => array(
'//*[@id="article"]/div[contains(@class, "content")]',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/satwcomic.com.php 0000664 0000000 0000000 00000000527 12656251722 0022130 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://satwcomic.com/day-at-the-beach',
'body' => array(
'//div[@class="container"]/center/a/img',
'//span[@itemprop="articleBody"]',
),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/scrumalliance.org.php 0000664 0000000 0000000 00000000551 12656251722 0022767 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'https://www.scrumalliance.org/community/articles/2015/march/an-introduction-to-agile-project-intake?feed=articles',
'body' => array(
'//div[@class="article_content"]',
),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/securityfocus.com.php 0000664 0000000 0000000 00000000400 12656251722 0023034 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.securityfocus.com/news/11569?ref=rss',
'body' => array(
'//div[@class="expanded"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/sentfromthemoon.com.php 0000664 0000000 0000000 00000000674 12656251722 0023371 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'body' => array(
'//div[@class="comicpane"]/a/img',
'//div[@class="entry"]',
),
'strip' => array(),
'test_url' => 'http://sentfromthemoon.com/archives/1417',
),
),
'filter' => array(
'%.*%' => array(
'%title="(.+)" */>%' => '/>
$1',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/sitepoint.com.php 0000664 0000000 0000000 00000000476 12656251722 0022160 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.sitepoint.com/creating-hello-world-app-swift/',
'body' => array(
'//section[@class="article_body"]',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/slashdot.org.php 0000664 0000000 0000000 00000000621 12656251722 0021764 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://science.slashdot.org/story/15/04/20/0528253/pull-top-can-tabs-at-50-reach-historic-archaeological-status',
'body' => array(
'//article/div[@class="body"] | //article[@class="layout-article"]/div[@class="elips"]', ),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/smallhousebliss.com.php 0000664 0000000 0000000 00000001153 12656251722 0023344 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://smallhousebliss.com/2013/08/29/house-g-by-lode-architecture/',
'body' => array(
'//div[@class="post-content"]',
),
'strip' => array(
'//*[contains(@class, "gallery")]',
'//*[contains(@class, "share")]',
'//*[contains(@class, "wpcnt")]',
'//*[contains(@class, "meta")]',
'//*[contains(@class, "postitle")]',
'//*[@id="nav-below"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/smarthomewelt.de.php 0000664 0000000 0000000 00000000520 12656251722 0022635 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://smarthomewelt.de/apple-tv-amazon-echo-smart-home/',
'body' => array('//div[@class="entry-inner"]/p | //div[@class="entry-inner"]/div[contains(@class,"wp-caption")]'),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/smashingmagazine.com.php 0000664 0000000 0000000 00000000473 12656251722 0023464 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.smashingmagazine.com/2015/04/17/using-sketch-for-responsive-web-design-case-study/',
'body' => array('//article[contains(@class,"post")]/p'),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/smbc-comics.com.php 0000664 0000000 0000000 00000000221 12656251722 0022325 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'%()%' => '$1$2$1after$2',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/soundandvision.com.php 0000664 0000000 0000000 00000001172 12656251722 0023177 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.soundandvision.com/content/james-guthrie-mixing-roger-waters-and-pink-floyd-51',
'body' => array(
'//div[@id="left"]',
),
'strip' => array(
'//div[@class="meta"]',
'//div[@class="ratingsbox"]',
'//h1',
'//h2',
'//addthis',
'//comment-links',
'//div[@class="book-navigation"]',
'//div[@class="comment-links"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/spiegel.de.php 0000664 0000000 0000000 00000000520 12656251722 0021372 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.spiegel.de/politik/ausland/afrika-angola-geht-gegen-islam-vor-und-schliesst-moscheen-a-935788.html',
'body' => array(
'//div[contains(@class, "article-section")]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/stereophile.com.php 0000664 0000000 0000000 00000000453 12656251722 0022460 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.stereophile.com/content/2015-rocky-mountain-audio-fest-starts-friday',
'body' => array(
'//div[@class="content clear-block"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/stupidfox.net.php 0000664 0000000 0000000 00000000520 12656251722 0022165 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://stupidfox.net/134-sleepy-time',
'body' => array(
'//div[@class="comicmid"]/center/a/img',
'//div[@class="stand_high"]',
),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/subtraction.com.php 0000664 0000000 0000000 00000000625 12656251722 0022473 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.subtraction.com/2015/06/06/time-lapse-video-of-one-world-trade-center/',
'body' => array('//article/div[@class="entry-content"]'),
'strip' => array(),
),
),
'filter' => array(
'%.*%' => array(
'%\+%' => '',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/sz.de.php 0000664 0000000 0000000 00000000623 12656251722 0020402 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://sz.de/1.2443161',
'body' => array('//article[@id="sitecontent"]/section[@class="topenrichment"]//img | //article[@id="sitecontent"]/section[@class="body"]/section[@class="authors"]/preceding-sibling::*[not(contains(@class, "ad"))]'),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/techcrunch.com.php 0000664 0000000 0000000 00000000655 12656251722 0022267 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://techcrunch.com/2013/08/31/indias-visa-maze/',
'body' => array(
'//div[contains(@class, "media-container")]',
'//div[@class="body-copy"]',
),
'strip' => array(
'//*[contains(@class, "module-crunchbase")]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/the-ebook-reader.com.php 0000664 0000000 0000000 00000000462 12656251722 0023252 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://blog.the-ebook-reader.com/2015/09/25/kobo-glo-hd-and-kobo-touch-2-0-covers-and-cases-roundup/',
'body' => array(
'//div[@class="entry"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/theatlantic.com.php 0000664 0000000 0000000 00000001445 12656251722 0022437 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.theatlantic.com/politics/archive/2015/09/what-does-it-mean-to-lament-the-poor-inside-panem/407317/',
'body' => array(
'//picture[@class="img"]',
'//figure/figcaption/span',
'//div/p[@itemprop="description"]',
'//div[@class="article-body"]',
),
'strip' => array(
'//aside[@class="callout"]',
'//span[@class="credit"]',
'//figcaption[@class="credit"]',
'//aside[contains(@class,"partner-box")]',
'//div[contains(@class,"ad")]',
'//a[contains(@class,"social-icon")]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/theawkwardyeti.com.php 0000664 0000000 0000000 00000000173 12656251722 0023170 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'%-150x150%' => '',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/thecodinglove.com.php 0000664 0000000 0000000 00000000374 12656251722 0022771 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://thecodinglove.com/post/116897934767',
'body' => array('//div[@class="bodytype"]'),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/thedoghousediaries.com.php 0000664 0000000 0000000 00000000666 12656251722 0024022 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'body' => array(
'//div[@class="comicpane"]/a/img',
'//div[@class="entry"]',
),
'strip' => array(),
'test_url' => 'http://thedoghousediaries.com/6023',
),
),
'filter' => array(
'%.*%' => array(
'%title="(.+)" */>%' => '/>
$1',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/thegamercat.com.php 0000664 0000000 0000000 00000000452 12656251722 0022420 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.thegamercat.com/comic/just-no/',
'body' => array('//div[@id="comic"] | //div[@class="post-content"]/div[@class="entry"]/p'),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/thehindu.com.php 0000664 0000000 0000000 00000001272 12656251722 0021745 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.thehindu.com/sci-tech/science/why-is-the-shape-of-cells-in-a-honeycomb-always-hexagonal/article7692306.ece?utm_source=RSS_Feed&utm_medium=RSS&utm_campaign=RSS_Syndication',
'body' => array(
'//div/img[@class="main-image"]',
'//div[@class="photo-caption"]',
'//div[@class="articleLead"]',
'//p',
'//span[@class="upper"]',
),
'strip' => array(
'//div[@id="articleKeywords"]',
'//div[@class="photo-source"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/thelocal.se.php 0000664 0000000 0000000 00000001230 12656251722 0021553 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.thelocal.se/20151018/swedish-moderates-tighten-focus-on-begging-ban',
'body' => array(
'//article',
),
'strip' => array(
'//p[@id="mobile-signature"]',
'//article/div[4]',
'//article/ul[1]',
'//div[@class="clr"]',
'//p[@class="small"]',
'//p[@style="font-weight: bold; font-size: 14px;"]',
'//div[@class="author"]',
'//div[@class="ad_container"]',
)
)
)
);
picoFeed-0.1.18/lib/PicoFeed/Rules/themerepublic.net.php 0000664 0000000 0000000 00000000605 12656251722 0022774 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.themerepublic.net/2015/04/david-lopez-pitoko.html?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+blogspot%2FDngUJ+%28Theme+Republic%29&utm_content=FeedBurner',
'body' => array('//*[@class="post-body"]'),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/themoscowtimes.com.php 0000664 0000000 0000000 00000001047 12656251722 0023207 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.themoscowtimes.com/business/article/535500.html',
'body' => array(
'//div[@class="article_main_img"]',
'//div[@class="article_text"]',
),
'strip' => array(
'//div[@class="articlebottom"]',
'//p/b',
'//p/a[contains(@href, "/article.php?id=")]',
'//div[@class="disqus_wrap"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/thenewslens.com.php 0000664 0000000 0000000 00000002006 12656251722 0022470 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://international.thenewslens.com/post/255032/',
'body' => array(
'//article/main[contains(@class, "content-post")]',
),
'strip' => array(
'//div[@class="photo-credit"]',
'//p[@align="center"]',
'//div[@class="clearfix"]',
'//div[@class="authorZone"]',
'//style',
'//div[@id="ttsbox"]',
'//div[@id="ttscontrols"]',
'//div[@class="author-info"]',
'//div[contains(@id, "-ad")]',
'//div[@style="font-size:small;margin:3px 0 0 0;vertical-align:top;line-height:24px;"]',
'//div[contains(@class, "hidden-xs")]',
'//div[contains(@class, "visible-xs")]',
'//div[contains(@class, "visible-lg")]',
'//a[@name="comment-panel"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/theodd1sout.com.php 0000664 0000000 0000000 00000000172 12656251722 0022376 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'%-150x150%' => '',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/theonion.com.php 0000664 0000000 0000000 00000000634 12656251722 0021761 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.theonion.com/article/wild-eyed-jim-harbaugh-informs-players-they-must-k-51397?utm_medium=RSS&utm_campaign=feeds',
'body' => array(
'//div[@class="content-masthead"]/figure/div/noscript/img',
'//div[@class="content-text"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/thestandard.com.hk.php 0000664 0000000 0000000 00000001157 12656251722 0023041 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.thestandard.com.hk/breaking_news_detail.asp?id=67156',
'body' => array(
'//table/tr/td/span[@class="bodyCopy"]',
),
'strip' => array(
'//script',
'//br',
'//map[@name="gif_bar"]',
'//img[contains(@usemap,"gif_bar")]',
'//a',
'//span[@class="bodyHeadline"]',
'//i',
'//b',
'//table',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/threepanelsoul.com.php 0000664 0000000 0000000 00000000374 12656251722 0023171 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'body' => array(
'//img[@id="cc-comic"]',
),
'test_url' => 'http://www.threepanelsoul.com/comic/uncloaking',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/timesofindia.indiatimes.com.php 0000664 0000000 0000000 00000000723 12656251722 0024735 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://timesofindia.indiatimes.com/city/mangaluru/Adani-UPCL-to-release-CSR-grant-of-Rs-3-74-crore-to-YellurGram-Panchayat/articleshow/50512116.cms',
'body' => array(
'//div[@class="article_content clearfix"]',
'//section[@class="highlight clearfix"]',
),
'strip' => array(
),
),
),
); picoFeed-0.1.18/lib/PicoFeed/Rules/travel-dealz.de.php 0000664 0000000 0000000 00000000702 12656251722 0022336 0 ustar 00root root 0000000 0000000 array(
'%^/blog.*%' => array(
'test_url' => 'http://travel-dealz.de/blog/venere-gutschein/',
'body' => array('//div[@class="post-entry"]'),
'strip' => array(
'//*[@id="jp-relatedposts"]',
'//*[@class="post-meta"]',
'//*[@class="post-data"]',
'//*[@id="author-meta"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/treehugger.com.php 0000664 0000000 0000000 00000000613 12656251722 0022274 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.treehugger.com/uncategorized/top-ten-posts-week-bunnies-2.html',
'body' => array(
'//div[contains(@class, "promo-image")]',
'//div[contains(@id, "entry-body")]',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/treelobsters.com.php 0000664 0000000 0000000 00000000213 12656251722 0022644 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'%title="(.+)" */>%' => '/>
$1',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/twogag.com.php 0000664 0000000 0000000 00000000303 12656251722 0021417 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'%http://www.twogag.com/comics-rss/([^.]+)\\.jpg%' => 'http://www.twogag.com/comics/$1.jpg',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/twokinds.keenspot.com.php 0000664 0000000 0000000 00000000523 12656251722 0023624 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://twokinds.keenspot.com/archive.php?p=0',
'body' => array('//*[@class="comic"]/div/a/img | //*[@class="comic"]/div/img | //*[@id="cg_img"]/img | //*[@id="cg_img"]/a/img'),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/undeadly.org.php 0000664 0000000 0000000 00000000555 12656251722 0021756 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://undeadly.org/cgi?action=article&sid=20141101181155',
'body' => array(
'/html/body/table[3]/tbody/tr/td[1]/table[2]/tr/td[1]',
),
'strip' => array(
'//font',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/upi.com.php 0000664 0000000 0000000 00000000744 12656251722 0020735 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.upi.com/Top_News/US/2015/09/26/Tech-giants-Hollywood-stars-among-guests-at-state-dinner-for-Chinas-Xi-Jinping/4541443281006/',
'body' => array(
'//div[@class="img"]',
'//div/article[@itemprop="articleBody"]',
),
'strip' => array(
'//div[@align="center"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/version2.dk.php 0000664 0000000 0000000 00000000562 12656251722 0021525 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.version2.dk/artikel/surface-pro-2-fungerer-bedre-til-arbejde-end-fornoejelse-55195',
'body' => array(
'//section[contains(@class, "teaser")]',
'//section[contains(@class, "body")]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/vgcats.com.php 0000664 0000000 0000000 00000000716 12656251722 0021426 0 ustar 00root root 0000000 0000000 array(
'%/comics.*%' => array(
'test_url' => 'http://www.vgcats.com/comics/?strip_id=358',
'body' => array('//*[@align="center"]/img'),
'strip' => array(),
),
'%/super.*%' => array(
'test_url' => 'http://www.vgcats.com/super/?strip_id=84',
'body' => array('//*[@align="center"]/p/img'),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/vuxml.org.php 0000664 0000000 0000000 00000000666 12656251722 0021327 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.vuxml.org/freebsd/a5f160fa-deee-11e4-99f8-080027ef73ec.html',
'body' => array(
'//body',
),
'strip' => array(
'//h1',
'//div[@class="blurb"]',
'//hr',
'//p[@class="copyright"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/www.bbc.co.uk.php 0000664 0000000 0000000 00000002545 12656251722 0021753 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.bbc.co.uk/news/world-middle-east-23911833',
'body' => array(
'//div[@class="story-body__inner"] | //div[@class="article"]',
'//div[@class="indPost"]',
),
'strip' => array(
'//form',
'//div[@id="headline"]',
'//*[@class="warning"]',
'//span[@class="off-screen"]',
'//span[@class="story-image-copyright"]',
'//ul[@class="story-body__unordered-list"]',
'//div[@class="ad_wrapper"]',
'//div[@id="article-sidebar"]',
'//div[@class="data-table-outer"]',
'//*[@class="story-date"]',
'//*[@class="story-header"]',
'//figure[contains(@class,"has-caption")]',
'//*[@class="story-related"]',
'//*[contains(@class, "byline")]',
'//p[contains(@class, "media-message")]',
'//*[contains(@class, "story-feature")]',
'//*[@id="video-carousel-container"]',
'//*[@id="also-related-links"]',
'//*[contains(@class, "share") or contains(@class, "hidden") or contains(@class, "hyper")]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/www.bdgest.com.php 0000664 0000000 0000000 00000000666 12656251722 0022236 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.bdgest.com/chronique-6027-BD-Adrastee-Tome-2.html',
'body' => array(
'//*[contains(@class, "chronique")]',
),
'strip' => array(
'//*[contains(@class, "post-review")]',
'//*[contains(@class, "footer-review")]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/www.bgr.in.php 0000664 0000000 0000000 00000001502 12656251722 0021356 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.bgr.in/news/xiaomi-redmi-3-with-13-megapixel-camera-snapdragon-616-launched-price-specifications-and-features/',
'body' => array(
'//div[@class="article-content"]',
),
'strip' => array(
'//*[@class="article-meta"]',
'//*[@class="contentAdsense300"]',
'//*[@class="iwpl-social-hide"]',
'//iframe[@class="iframeads"]',
'//*[@class="disqus_thread"]',
'//*[@class="outb-mobile OUTBRAIN"]',
'//*[@class="wdt_smart_alerts"]',
'//*[@class="footnote"]',
'//*[@id="gadget-widget"]',
'//header[@class="article-title entry-header"]',
),
),
),
); picoFeed-0.1.18/lib/PicoFeed/Rules/www.businessweek.com.php 0000664 0000000 0000000 00000000721 12656251722 0023465 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.businessweek.com/articles/2013-09-18/elon-musks-hyperloop-will-work-says-some-very-smart-software',
'body' => array(
'//div[@id="lead_graphic"]',
'//div[@id="article_body"]',
),
'strip' => array(
'//*[contains(@class, "related_item")]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/www.cnn.com.php 0000664 0000000 0000000 00000002042 12656251722 0021532 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.cnn.com/2013/08/31/world/meast/syria-civil-war/index.html?hpt=hp_t1',
'body' => array(
'//div[@class="cnn_strycntntlft"]',
),
'strip' => array(
'//div[@class="cnn_stryshrwdgtbtm"]',
'//div[@class="cnn_strybtmcntnt"]',
'//div[@class="cnn_strylftcntnt"]',
'//div[contains(@class, "cnnGalleryContainer")]',
'//div[contains(@class, "cnn_strylftcexpbx")]',
'//div[contains(@class, "articleGalleryNavContainer")]',
'//div[contains(@class, "cnnArticleGalleryCaptionControl")]',
'//div[contains(@class, "cnnArticleGalleryNavPrevNextDisabled")]',
'//div[contains(@class, "cnnArticleGalleryNavPrevNext")]',
'//div[contains(@class, "cnn_html_media_title_new")]',
'//div[contains(@id, "disqus")]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/www.developpez.com.php 0000664 0000000 0000000 00000001410 12656251722 0023127 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.developpez.com/actu/81757/Mozilla-annonce-la-disponibilite-de-Firefox-36-qui-passe-au-HTTP-2-et-permet-la-synchronisation-de-son-ecran-d-accueil/',
'body' => array(
'//*[@itemprop="articleBody"]',
),
'strip' => array(
'//form',
'//div[@class="content"]/img',
'//a[last()]/following-sibling::*',
'//*[contains(@class,"actuTitle")]',
'//*[contains(@class,"date")]',
'//*[contains(@class,"inlineimg")]',
'//*[@id="signaler"]',
'//*[@id="signalerFrame"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/www.egscomics.com.php 0000664 0000000 0000000 00000000440 12656251722 0022730 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.egscomics.com/index.php?id=1690',
'title' => '/html/head/title',
'body' => array(
'//img[@id="comic"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/www.fakingnews.firstpost.com.php 0000664 0000000 0000000 00000001102 12656251722 0025140 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.fakingnews.firstpost.com/2016/01/engineering-student-creates-record-in-a-decade-becomes-the-first-to-completely-exhaust-ball-pen-refill/',
'body' => array(
'//div[@class="entry"]',
),
'strip' => array(
'//*[@class="socialshare_bar"]',
'//*[@class="authorbox"]',
'//*[@class="cf5_rps"]',
'//*[@class="60563 fb-comments fb-social-plugin"]',
),
),
),
); picoFeed-0.1.18/lib/PicoFeed/Rules/www.forbes.com.php 0000664 0000000 0000000 00000001441 12656251722 0022236 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.forbes.com/sites/andygreenberg/2013/09/05/follow-the-bitcoins-how-we-got-busted-buying-drugs-on-silk-roads-black-market/',
'body' => array(
'//div[@id="leftRail"]/div[contains(@class, body)]',
),
'strip' => array(
'//aside',
'//div[contains(@class, "entity_block")]',
'//div[contains(@class, "vestpocket") and not contains(@class, "body")]',
'//div[contains(@style, "display")]',
'//div[contains(@id, "comment")]',
'//div[contains(@class, "widget")]',
'//div[contains(@class, "pagination")]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/www.franceculture.fr.php 0000664 0000000 0000000 00000000606 12656251722 0023453 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.franceculture.fr/emission-culture-eco-la-finance-aime-toujours-la-france-2016-01-08',
'body' => array(
'//div[@class="listen"]',
'//div[@class="field-items"]',
),
'strip' => array(
),
)
)
);
picoFeed-0.1.18/lib/PicoFeed/Rules/www.futura-sciences.com.php 0000664 0000000 0000000 00000001267 12656251722 0024064 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.futura-sciences.com/magazines/espace/infos/actu/d/astronautique-curiosity-franchi-succes-dune-dingo-gap-52289/#xtor=RSS-8',
'body' => array(
'//div[contains(@class, "content fiche-")]',
),
'strip' => array(
'//h1',
'//*[contains(@class, "content-date")]',
'//*[contains(@class, "diaporama")]',
'//*[contains(@class, "slider")]',
'//*[contains(@class, "cartouche")]',
'//*[contains(@class, "noprint")]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/www.geekculture.com.php 0000664 0000000 0000000 00000000472 12656251722 0023300 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.geekculture.com/joyoftech/joyarchives/2180.html',
'body' => array(
'//p[contains(@class,"Maintext")][2]/img',
),
'strip' => array(),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/www.howtogeek.com.php 0000664 0000000 0000000 00000000567 12656251722 0022762 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.howtogeek.com/235283/what-is-a-wireless-hard-drive-and-should-i-get-one/',
'body' => array(
'//div[@class="thecontent"]',
),
'strip' => array(
'//*[@class="relatedside"]',
),
),
),
); picoFeed-0.1.18/lib/PicoFeed/Rules/www.lepoint.fr.php 0000664 0000000 0000000 00000001302 12656251722 0022255 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.lepoint.fr/c-est-arrive-aujourd-hui/19-septembre-1783-pour-la-premiere-fois-un-mouton-un-canard-et-un-coq-s-envoient-en-l-air-devant-louis-xvi-18-09-2012-1507704_494.php',
'body' => array(
'//article',
),
'strip' => array(
'//*[contains(@class, "info_article")]',
'//*[contains(@class, "fildariane_titre")]',
'//*[contains(@class, "entete2_article")]',
'//*[contains(@class, "signature_article")]',
'//*[contains(@id, "share")]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/www.lesnumeriques.com.php 0000664 0000000 0000000 00000001672 12656251722 0023665 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.lesnumeriques.com/blender/kitchenaid-diamond-5ksb1585-p27473/test.html',
'body' => array(
'//*[@id="product-content"]',
'//*[@id="news-content"]',
'//*[@id="article-content"]',
),
'strip' => array(
'//form',
'//div[contains(@class, "price-v4"])',
'//div[contains(@class, "authors-and-date")]',
'//div[contains(@class, "mini-product")]',
'//div[@id="articles-related-authors"]',
'//div[@id="tags-socials"]',
'//div[@id="user-reviews"]',
'//div[@id="product-reviews"]',
'//div[@id="publication-breadcrumbs-and-date"]',
'//div[@id="publication-breadcrumbs-and-date"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/www.mac4ever.com.php 0000664 0000000 0000000 00000000572 12656251722 0022470 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.mac4ever.com/actu/87392_video-quand-steve-jobs-et-bill-gates-jouaient-au-bachelor-avec-le-mac',
'body' => array(
'//div[contains(@class, "news-news-content")]',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/www.makeuseof.com.php 0000664 0000000 0000000 00000001042 12656251722 0022732 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.makeuseof.com/tag/having-problems-with-audio-in-windows-10-heres-a-likely-fix/',
'body' => array(
'//div[@class="entry"]',
),
'strip' => array(
'//*[@class="new_sharebar"]',
'//*[@class="author"]',
'//*[@class="wdt_grouvi"]',
'//*[@class="wdt_smart_alerts"]',
'//*[@class="modal fade grouvi"]',
),
),
),
); picoFeed-0.1.18/lib/PicoFeed/Rules/www.npr.org.php 0000664 0000000 0000000 00000001267 12656251722 0021574 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.npr.org/blogs/thesalt/2013/09/17/223345977/auto-brewery-syndrome-apparently-you-can-make-beer-in-your-gut',
'body' => array(
'//div[@id="storytext"]',
),
'strip' => array(
'//*[@class="bucket img"]',
'//*[@class="creditwrap"]',
'//div[@class="credit-caption"]',
'//*[@class="credit"]',
'//*[@class="captionwrap"]',
'//*[@class="toggle-caption"]',
'//*[contains(@class, "enlargebtn")]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/www.numerama.com.php 0000664 0000000 0000000 00000000633 12656251722 0022565 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.numerama.com/sciences/125959-recherches-ladn-recompensees-nobel-de-chimie.html',
'body' => array(
'//article',
),
'strip' => array(
'//footer',
'//section[@class="related-article"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/www.oneindia.com.php 0000664 0000000 0000000 00000000630 12656251722 0022543 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.oneindia.com/india/b-luru-govt-likely-remove-word-eunuch-from-sec-36-a-karnataka-police-act-1981173.html',
'body' => array(
'//div[@class="ecom-ad-content"]',
),
'strip' => array(
'//*[@id="view_cmtns"]',
),
),
),
); picoFeed-0.1.18/lib/PicoFeed/Rules/www.pcinpact.com.php 0000664 0000000 0000000 00000000664 12656251722 0022565 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.pcinpact.com/news/85954-air-france-ne-vous-demande-plus-deteindre-vos-appareils-electroniques.htm?utm_source=PCi_RSS_Feed&utm_medium=news&utm_campaign=pcinpact',
'body' => array(
'//div[contains(@id, "actu_content")]',
),
'strip' => array(
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/www.pseudo-sciences.org.php 0000664 0000000 0000000 00000000652 12656251722 0024063 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.pseudo-sciences.org/spip.php?article2275',
'body' => array(
'//div[@id="art_main"]',
),
'strip' => array(
'//div[@id="art_print"]',
'//div[@id="art_chapo"]',
'//img[@class="puce"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/www.slate.fr.php 0000664 0000000 0000000 00000001371 12656251722 0021721 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.slate.fr/monde/77034/allemagne-2013-couacs-campagne',
'body' => array(
'//div[@class="article_content"]',
),
'strip' => array(
'//*[@id="slate_associated_bn"]',
'//*[@id="ligatus-article"]',
'//*[@id="article_sidebar"]',
'//div[contains(@id, "reseaux")]',
'//*[contains(@class, "smart") or contains(@class, "article_tags") or contains(@class, "article_reactions")]',
'//*[contains(@class, "OUTBRAIN") or contains(@class, "related_item") or contains(@class, "share")]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/www.universfreebox.com.php 0000664 0000000 0000000 00000000704 12656251722 0024025 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://www.universfreebox.com/article/24305/4G-Bouygues-Telecom-lance-une-vente-flash-sur-son-forfait-Sensation-3Go',
'body' => array(
'//div[@id="corps_corps"]',
),
'strip' => array(
'//*[@id="formulaire"]',
'//*[@id="commentaire"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/www.zeit.de.php 0000664 0000000 0000000 00000003125 12656251722 0021544 0 ustar 00root root 0000000 0000000 array(
'%^/zeit-magazin.*%' => array(
'test_url' => 'http://www.zeit.de/zeit-magazin/2015/15/pegida-kathrin-oertel-lutz-bachmann',
'body' => array(
'//article[@class="article"]',
),
'strip' => array(
'//header/div/h1',
'//header/div/div[@class="article__head__subtitle"]',
'//header/div/div[@class="article__column__author"]',
'//header/div/div[@class="article__column__author"]',
'//header/div/span[@class="article__head__meta-wrap"]',
'//form',
'//style',
'//div[contains(@class, "ad-tile")]',
'//div[@class="iqd-mobile-adplace"]',
'//div[@id="iq-artikelanker"]',
'//div[@id="js-social-services"]',
'//section[@id="js-comments"]',
'//aside',
),
),
'%.*%' => array(
'test_url' => 'http://www.zeit.de/politik/ausland/2015-04/thessaloniki-krise-griechenland-yannis-boutaris/',
'body' => array(
'//div[@class="article-body"]',
),
'strip' => array(
'//*[@class="articleheader"]',
'//*[@class="excerpt"]',
'//div[contains(@class, "ad")]',
'//div[@itemprop="video"]',
'//*[@class="articlemeta"]',
'//*[@class="articlemeta-clear"]',
'//*[@class="zol_inarticletools"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/xkcd.com.php 0000664 0000000 0000000 00000000211 12656251722 0021056 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'%alt="(.+)" */>%' => '/>
$1',
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Rules/zdnet.com.php 0000664 0000000 0000000 00000001620 12656251722 0021256 0 ustar 00root root 0000000 0000000 array(
'%.*%' => array(
'test_url' => 'http://zdnet.com.feedsportal.com/c/35462/f/675637/s/4a33c93e/sc/11/l/0L0Szdnet0N0Carticle0Cchina0Eus0Eagree0Eon0Ecybercrime0Ecooperation0Eamid0Econtinued0Etension0C0Tftag0FRSSbaffb68/story01.htm',
'body' => array(
'//p[@class="summary"]',
'//div[contains(@class,"storyBody")]',
),
'strip' => array(
'//*[contains(@class,"ad-")]',
'//p/span',
'//script',
'//p[@class="summary"]',
'//div[contains(@class,"relatedContent")]',
'//div[contains(@class,"loader")]',
'//p[@class="photoDetails"]',
'//div[@class="thumbnailSlider"]',
'//div[@class="shortcodeGalleryWrapper"]',
),
),
),
);
picoFeed-0.1.18/lib/PicoFeed/Scraper/ 0000775 0000000 0000000 00000000000 12656251722 0017152 5 ustar 00root root 0000000 0000000 picoFeed-0.1.18/lib/PicoFeed/Scraper/CandidateParser.php 0000664 0000000 0000000 00000014335 12656251722 0022722 0 ustar 00root root 0000000 0000000 dom = XmlParser::getHtmlDocument(''.$html);
$this->xpath = new DOMXPath($this->dom);
}
/**
* Get the relevant content with the list of potential attributes.
*
* @return string
*/
public function execute()
{
$content = $this->findContentWithCandidates();
if (strlen($content) < 200) {
$content = $this->findContentWithArticle();
}
if (strlen($content) < 50) {
$content = $this->findContentWithBody();
}
return $this->stripGarbage($content);
}
/**
* Find content based on the list of tag candidates.
*
* @return string
*/
public function findContentWithCandidates()
{
foreach ($this->candidatesAttributes as $candidate) {
Logger::setMessage(get_called_class().': Try this candidate: "'.$candidate.'"');
$nodes = $this->xpath->query('//*[(contains(@class, "'.$candidate.'") or @id="'.$candidate.'") and not (contains(@class, "nav") or contains(@class, "page"))]');
if ($nodes !== false && $nodes->length > 0) {
Logger::setMessage(get_called_class().': Find candidate "'.$candidate.'"');
return $this->dom->saveXML($nodes->item(0));
}
}
return '';
}
/**
* Find tag.
*
* @return string
*/
public function findContentWithArticle()
{
$nodes = $this->xpath->query('//article');
if ($nodes !== false && $nodes->length > 0) {
Logger::setMessage(get_called_class().': Find tag');
return $this->dom->saveXML($nodes->item(0));
}
return '';
}
/**
* Find tag.
*
* @return string
*/
public function findContentWithBody()
{
$nodes = $this->xpath->query('//body');
if ($nodes !== false && $nodes->length > 0) {
Logger::setMessage(get_called_class().' Find ');
return $this->dom->saveXML($nodes->item(0));
}
return '';
}
/**
* Strip useless tags.
*
* @param string $content
*
* @return string
*/
public function stripGarbage($content)
{
$dom = XmlParser::getDomDocument($content);
if ($dom !== false) {
$xpath = new DOMXPath($dom);
$this->stripTags($xpath);
$this->stripAttributes($dom, $xpath);
$content = $dom->saveXML($dom->documentElement);
}
return $content;
}
/**
* Remove blacklisted tags.
*
* @param DOMXPath $xpath
*/
public function stripTags(DOMXPath $xpath)
{
foreach ($this->stripTags as $tag) {
$nodes = $xpath->query('//'.$tag);
if ($nodes !== false && $nodes->length > 0) {
Logger::setMessage(get_called_class().': Strip tag: "'.$tag.'"');
foreach ($nodes as $node) {
$node->parentNode->removeChild($node);
}
}
}
}
/**
* Remove blacklisted attributes.
*
* @param DomDocument $dom
* @param DOMXPath $xpath
*/
public function stripAttributes(DomDocument $dom, DOMXPath $xpath)
{
foreach ($this->stripAttributes as $attribute) {
$nodes = $xpath->query('//*[contains(@class, "'.$attribute.'") or contains(@id, "'.$attribute.'")]');
if ($nodes !== false && $nodes->length > 0) {
Logger::setMessage(get_called_class().': Strip attribute: "'.$attribute.'"');
foreach ($nodes as $node) {
if ($this->shouldRemove($dom, $node)) {
$node->parentNode->removeChild($node);
}
}
}
}
}
/**
* Return false if the node should not be removed.
*
* @param DomDocument $dom
* @param DomNode $node
*
* @return bool
*/
public function shouldRemove(DomDocument $dom, $node)
{
$document_length = strlen($dom->textContent);
$node_length = strlen($node->textContent);
if ($document_length === 0) {
return true;
}
$ratio = $node_length * 100 / $document_length;
if ($ratio >= 90) {
Logger::setMessage(get_called_class().': Should not remove this node ('.$node->nodeName.') ratio: '.$ratio.'%');
return false;
}
return true;
}
}
picoFeed-0.1.18/lib/PicoFeed/Scraper/ParserInterface.php 0000664 0000000 0000000 00000000301 12656251722 0022732 0 ustar 00root root 0000000 0000000 config = $config;
}
/**
* Get the rules for an URL.
*
* @param string $url the URL that should be looked up
*
* @return array the array containing the rules
*/
public function getRules($url)
{
$hostname = parse_url($url, PHP_URL_HOST);
if ($hostname !== false) {
$files = $this->getRulesFileList($hostname);
foreach ($this->getRulesFolders() as $folder) {
$rule = $this->loadRuleFile($folder, $files);
if (!empty($rule)) {
return $rule;
}
}
}
return array();
}
/**
* Get the list of possible rules file names for a given hostname.
*
* @param string $hostname Hostname
*
* @return array
*/
public function getRulesFileList($hostname)
{
$files = array($hostname); // subdomain.domain.tld
$parts = explode('.', $hostname);
$len = count($parts);
if ($len > 2) {
$subdomain = array_shift($parts);
$files[] = implode('.', $parts); // domain.tld
$files[] = '.'.implode('.', $parts); // .domain.tld
$files[] = $subdomain; // subdomain
} elseif ($len === 2) {
$files[] = '.'.implode('.', $parts); // .domain.tld
$files[] = $parts[0]; // domain
}
return $files;
}
/**
* Load a rule file from the defined folder.
*
* @param string $folder Rule directory
* @param array $files List of possible file names
*
* @return array
*/
public function loadRuleFile($folder, array $files)
{
foreach ($files as $file) {
$filename = $folder.'/'.$file.'.php';
if (file_exists($filename)) {
Logger::setMessage(get_called_class().' Load rule: '.$file);
return include $filename;
}
}
return array();
}
/**
* Get the list of folders that contains rules.
*
* @return array
*/
public function getRulesFolders()
{
$folders = array(__DIR__.'/../Rules');
if ($this->config !== null && $this->config->getGrabberRulesFolder() !== null) {
$folders[] = $this->config->getGrabberRulesFolder();
}
return $folders;
}
}
picoFeed-0.1.18/lib/PicoFeed/Scraper/RuleParser.php 0000664 0000000 0000000 00000003561 12656251722 0021754 0 ustar 00root root 0000000 0000000 rules = $rules;
$this->dom = XmlParser::getHtmlDocument(''.$html);
$this->xpath = new DOMXPath($this->dom);
}
/**
* Get the relevant content with predefined rules.
*
* @return string
*/
public function execute()
{
$this->stripTags();
return $this->findContent();
}
/**
* Remove HTML tags.
*/
public function stripTags()
{
if (isset($this->rules['strip']) && is_array($this->rules['strip'])) {
foreach ($this->rules['strip'] as $pattern) {
$nodes = $this->xpath->query($pattern);
if ($nodes !== false && $nodes->length > 0) {
foreach ($nodes as $node) {
$node->parentNode->removeChild($node);
}
}
}
}
}
/**
* Fetch content based on Xpath rules.
*/
public function findContent()
{
$content = '';
if (isset($this->rules['body']) && is_array($this->rules['body'])) {
foreach ($this->rules['body'] as $pattern) {
$nodes = $this->xpath->query($pattern);
if ($nodes !== false && $nodes->length > 0) {
foreach ($nodes as $node) {
$content .= $this->dom->saveXML($node);
}
}
}
}
return $content;
}
}
picoFeed-0.1.18/lib/PicoFeed/Scraper/Scraper.php 0000664 0000000 0000000 00000015771 12656251722 0021275 0 ustar 00root root 0000000 0000000 config = $config;
Logger::setTimezone($this->config->getTimezone());
}
/**
* Disable candidates parsing.
*
* @return Scraper
*/
public function disableCandidateParser()
{
$this->enableCandidateParser = false;
return $this;
}
/**
* Get encoding.
*
* @return string
*/
public function getEncoding()
{
return $this->encoding;
}
/**
* Set encoding.
*
* @param string $encoding
*
* @return Scraper
*/
public function setEncoding($encoding)
{
$this->encoding = $encoding;
return $this;
}
/**
* Get URL to download.
*
* @return string
*/
public function getUrl()
{
return $this->url;
}
/**
* Set URL to download.
*
* @param string $url URL
*
* @return Scraper
*/
public function setUrl($url)
{
$this->url = $url;
return $this;
}
/**
* Return true if the scraper found relevant content.
*
* @return bool
*/
public function hasRelevantContent()
{
return !empty($this->content);
}
/**
* Get relevant content.
*
* @return string
*/
public function getRelevantContent()
{
return $this->content;
}
/**
* Get raw content (unfiltered).
*
* @return string
*/
public function getRawContent()
{
return $this->html;
}
/**
* Set raw content (unfiltered).
*
* @param string $html
*
* @return Scraper
*/
public function setRawContent($html)
{
$this->html = $html;
return $this;
}
/**
* Get filtered relevant content.
*
* @return string
*/
public function getFilteredContent()
{
$filter = Filter::html($this->content, $this->url);
$filter->setConfig($this->config);
return $filter->execute();
}
/**
* Download the HTML content.
*
* @return bool
*/
public function download()
{
if (!empty($this->url)) {
// Clear everything
$this->html = '';
$this->content = '';
$this->encoding = '';
try {
$client = Client::getInstance();
$client->setConfig($this->config);
$client->setTimeout($this->config->getGrabberTimeout());
$client->setUserAgent($this->config->getGrabberUserAgent());
$client->execute($this->url);
$this->url = $client->getUrl();
$this->html = $client->getContent();
$this->encoding = $client->getEncoding();
return true;
} catch (ClientException $e) {
Logger::setMessage(get_called_class().': '.$e->getMessage());
}
}
return false;
}
/**
* Execute the scraper.
*/
public function execute()
{
$this->download();
if (!$this->skipProcessing()) {
$this->prepareHtml();
$parser = $this->getParser();
if ($parser !== null) {
$this->content = $parser->execute();
Logger::setMessage(get_called_class().': Content length: '.strlen($this->content).' bytes');
}
}
}
/**
* Returns true if the parsing must be skipped.
*
* @return bool
*/
public function skipProcessing()
{
$handlers = array(
'detectStreamingVideos',
'detectPdfFiles',
);
foreach ($handlers as $handler) {
if ($this->$handler()) {
return true;
}
}
if (empty($this->html)) {
Logger::setMessage(get_called_class().': Raw HTML is empty');
return true;
}
return false;
}
/**
* Get the parser.
*
* @return ParserInterface
*/
public function getParser()
{
$ruleLoader = new RuleLoader($this->config);
$rules = $ruleLoader->getRules($this->url);
if (!empty($rules['grabber'])) {
Logger::setMessage(get_called_class().': Parse content with rules');
foreach ($rules['grabber'] as $pattern => $rule) {
$url = new Url($this->url);
$sub_url = $url->getFullPath();
if (preg_match($pattern, $sub_url)) {
Logger::setMessage(get_called_class().': Matched url '.$sub_url);
return new RuleParser($this->html, $rule);
}
}
} elseif ($this->enableCandidateParser) {
Logger::setMessage(get_called_class().': Parse content with candidates');
return new CandidateParser($this->html);
}
return;
}
/**
* Normalize encoding and strip head tag.
*/
public function prepareHtml()
{
$html_encoding = XmlParser::getEncodingFromMetaTag($this->html);
$this->html = Encoding::convert($this->html, $html_encoding ?: $this->encoding);
$this->html = Filter::stripHeadTags($this->html);
Logger::setMessage(get_called_class().': HTTP Encoding "'.$this->encoding.'" ; HTML Encoding "'.$html_encoding.'"');
}
/**
* Return the Youtube embed player and skip processing.
*
* @return bool
*/
public function detectStreamingVideos()
{
if (preg_match("#(?<=v=|v\/|vi=|vi\/|youtu.be\/)[a-zA-Z0-9_-]{11}#", $this->url, $matches)) {
$this->content = '';
return true;
}
return false;
}
/**
* Skip processing for PDF documents.
*
* @return bool
*/
public function detectPdfFiles()
{
return substr($this->url, -3) === 'pdf';
}
}
picoFeed-0.1.18/lib/PicoFeed/Serialization/ 0000775 0000000 0000000 00000000000 12656251722 0020370 5 ustar 00root root 0000000 0000000 picoFeed-0.1.18/lib/PicoFeed/Serialization/Export.php 0000664 0000000 0000000 00000005776 12656251722 0022401 0 ustar 00root root 0000000 0000000 content = $content;
}
/**
* Get the OPML document.
*
* @return string
*/
public function execute()
{
$xml = new SimpleXMLElement('');
$head = $xml->addChild('head');
$head->addChild('title', 'OPML Export');
$body = $xml->addChild('body');
foreach ($this->content as $category => $values) {
if (is_string($category)) {
$this->createCategory($body, $category, $values);
} else {
$this->createEntry($body, $values);
}
}
return $xml->asXML();
}
/**
* Create a feed entry.
*
* @param SimpleXMLElement $parent Parent Element
* @param array $feed Feed properties
*/
public function createEntry(SimpleXMLElement $parent, array $feed)
{
$valid = true;
foreach ($this->required_fields as $field) {
if (!isset($feed[$field])) {
$valid = false;
break;
}
}
if ($valid) {
$outline = $parent->addChild('outline');
$outline->addAttribute('xmlUrl', $feed['feed_url']);
$outline->addAttribute('htmlUrl', $feed['site_url']);
$outline->addAttribute('title', $feed['title']);
$outline->addAttribute('text', $feed['title']);
$outline->addAttribute('description', isset($feed['description']) ? $feed['description'] : $feed['title']);
$outline->addAttribute('type', 'rss');
$outline->addAttribute('version', 'RSS');
}
}
/**
* Create entries for a feed list.
*
* @param SimpleXMLElement $parent Parent Element
* @param array $feeds Feed list
*/
public function createEntries(SimpleXMLElement $parent, array $feeds)
{
foreach ($feeds as $feed) {
$this->createEntry($parent, $feed);
}
}
/**
* Create a category entry.
*
* @param SimpleXMLElement $parent Parent Element
* @param string $category Category
* @param array $feeds Feed properties
*/
public function createCategory(SimpleXMLElement $parent, $category, array $feeds)
{
$outline = $parent->addChild('outline');
$outline->addAttribute('text', $category);
$this->createEntries($outline, $feeds);
}
}
picoFeed-0.1.18/lib/PicoFeed/Serialization/Import.php 0000664 0000000 0000000 00000007667 12656251722 0022373 0 ustar 00root root 0000000 0000000 content = $content;
}
/**
* Parse the OPML file.
*
* @return array|false
*/
public function execute()
{
Logger::setMessage(get_called_class().': start importation');
$xml = XmlParser::getSimpleXml(trim($this->content));
if ($xml === false || $xml->getName() !== 'opml' || !isset($xml->body)) {
Logger::setMessage(get_called_class().': OPML tag not found or malformed XML document');
return false;
}
$this->parseEntries($xml->body);
Logger::setMessage(get_called_class().': '.count($this->items).' subscriptions found');
return $this->items;
}
/**
* Parse each entries of the subscription list.
*
* @param SimpleXMLElement $tree XML node
*/
public function parseEntries(SimpleXMLElement $tree)
{
if (isset($tree->outline)) {
foreach ($tree->outline as $item) {
if (isset($item->outline)) {
$this->parseEntries($item);
} elseif ((isset($item['text']) || isset($item['title'])) && isset($item['xmlUrl'])) {
$entry = new StdClass();
$entry->category = $this->findCategory($tree);
$entry->title = $this->findTitle($item);
$entry->feed_url = $this->findFeedUrl($item);
$entry->site_url = $this->findSiteUrl($item, $entry);
$entry->type = $this->findType($item);
$entry->description = $this->findDescription($item, $entry);
$this->items[] = $entry;
}
}
}
}
/**
* Find category.
*
* @param SimpleXmlElement $tree XML tree
*
* @return string
*/
public function findCategory(SimpleXmlElement $tree)
{
return isset($tree['title']) ? (string) $tree['title'] : (string) $tree['text'];
}
/**
* Find title.
*
* @param SimpleXmlElement $item XML tree
*
* @return string
*/
public function findTitle(SimpleXmlElement $item)
{
return isset($item['title']) ? (string) $item['title'] : (string) $item['text'];
}
/**
* Find feed url.
*
* @param SimpleXmlElement $item XML tree
*
* @return string
*/
public function findFeedUrl(SimpleXmlElement $item)
{
return (string) $item['xmlUrl'];
}
/**
* Find site url.
*
* @param SimpleXmlElement $item XML tree
* @param StdClass $entry Feed entry
*
* @return string
*/
public function findSiteUrl(SimpleXmlElement $item, StdClass $entry)
{
return isset($item['htmlUrl']) ? (string) $item['htmlUrl'] : $entry->feed_url;
}
/**
* Find type.
*
* @param SimpleXmlElement $item XML tree
*
* @return string
*/
public function findType(SimpleXmlElement $item)
{
return isset($item['version']) ? (string) $item['version'] : isset($item['type']) ? (string) $item['type'] : 'rss';
}
/**
* Find description.
*
* @param SimpleXmlElement $item XML tree
* @param StdClass $entry Feed entry
*
* @return string
*/
public function findDescription(SimpleXmlElement $item, StdClass $entry)
{
return isset($item['description']) ? (string) $item['description'] : $entry->title;
}
}
picoFeed-0.1.18/lib/PicoFeed/Syndication/ 0000775 0000000 0000000 00000000000 12656251722 0020037 5 ustar 00root root 0000000 0000000 picoFeed-0.1.18/lib/PicoFeed/Syndication/Atom.php 0000664 0000000 0000000 00000014005 12656251722 0021450 0 ustar 00root root 0000000 0000000 checkRequiredProperties($this->required_feed_properties, $this);
$this->dom = new DomDocument('1.0', 'UTF-8');
$this->dom->formatOutput = true;
//
$feed = $this->dom->createElement('feed');
$feed->setAttributeNodeNS(new DomAttr('xmlns', 'http://www.w3.org/2005/Atom'));
//
$generator = $this->dom->createElement('generator', 'PicoFeed');
$generator->setAttribute('uri', 'https://github.com/fguillot/picoFeed');
$feed->appendChild($generator);
//
$title = $this->dom->createElement('title');
$title->appendChild($this->dom->createTextNode($this->title));
$feed->appendChild($title);
//
$id = $this->dom->createElement('id');
$id->appendChild($this->dom->createTextNode($this->site_url));
$feed->appendChild($id);
//
$this->addUpdated($feed, $this->updated);
//
$this->addLink($feed, $this->site_url);
//
$this->addLink($feed, $this->feed_url, 'self', 'application/atom+xml');
//
if (isset($this->author)) {
$this->addAuthor($feed, $this->author);
}
//
foreach ($this->items as $item) {
$this->checkRequiredProperties($this->required_item_properties, $item);
$feed->appendChild($this->createEntry($item));
}
$this->dom->appendChild($feed);
if ($filename) {
$this->dom->save($filename);
} else {
return $this->dom->saveXML();
}
}
/**
* Create item entry.
*
* @param arrray $item Item properties
*
* @return DomElement
*/
public function createEntry(array $item)
{
$entry = $this->dom->createElement('entry');
//
$title = $this->dom->createElement('title');
$title->appendChild($this->dom->createTextNode($item['title']));
$entry->appendChild($title);
//
$id = $this->dom->createElement('id');
$id->appendChild($this->dom->createTextNode(isset($item['id']) ? $item['id'] : $item['url']));
$entry->appendChild($id);
//
$this->addUpdated($entry, isset($item['updated']) ? $item['updated'] : '');
//
if (isset($item['published'])) {
$entry->appendChild($this->dom->createElement('published', date(DATE_ATOM, $item['published'])));
}
//
$this->addLink($entry, $item['url']);
//
if (isset($item['summary'])) {
$summary = $this->dom->createElement('summary');
$summary->appendChild($this->dom->createTextNode($item['summary']));
$entry->appendChild($summary);
}
//
if (isset($item['content'])) {
$content = $this->dom->createElement('content');
$content->setAttribute('type', 'html');
$content->appendChild($this->dom->createCDATASection($item['content']));
$entry->appendChild($content);
}
//
if (isset($item['author'])) {
$this->addAuthor($entry, $item['author']);
}
return $entry;
}
/**
* Add Link.
*
* @param DomElement $xml XML node
* @param string $url URL
* @param string $rel Link rel attribute
* @param string $type Link type attribute
*/
public function addLink(DomElement $xml, $url, $rel = 'alternate', $type = 'text/html')
{
$link = $this->dom->createElement('link');
$link->setAttribute('rel', $rel);
$link->setAttribute('type', $type);
$link->setAttribute('href', $url);
$xml->appendChild($link);
}
/**
* Add publication date.
*
* @param DomElement $xml XML node
* @param int $value Timestamp
*/
public function addUpdated(DomElement $xml, $value = 0)
{
$xml->appendChild($this->dom->createElement(
'updated',
date(DATE_ATOM, $value ?: time())
));
}
/**
* Add author.
*
* @param DomElement $xml XML node
* @param array $values Author name and email
*/
public function addAuthor(DomElement $xml, array $values)
{
$author = $this->dom->createElement('author');
if (isset($values['name'])) {
$name = $this->dom->createElement('name');
$name->appendChild($this->dom->createTextNode($values['name']));
$author->appendChild($name);
}
if (isset($values['email'])) {
$email = $this->dom->createElement('email');
$email->appendChild($this->dom->createTextNode($values['email']));
$author->appendChild($email);
}
if (isset($values['url'])) {
$uri = $this->dom->createElement('uri');
$uri->appendChild($this->dom->createTextNode($values['url']));
$author->appendChild($uri);
}
$xml->appendChild($author);
}
}
picoFeed-0.1.18/lib/PicoFeed/Syndication/Rss20.php 0000664 0000000 0000000 00000013514 12656251722 0021465 0 ustar 00root root 0000000 0000000 checkRequiredProperties($this->required_feed_properties, $this);
$this->dom = new DomDocument('1.0', 'UTF-8');
$this->dom->formatOutput = true;
//
$rss = $this->dom->createElement('rss');
$rss->setAttribute('version', '2.0');
$rss->setAttributeNodeNS(new DomAttr('xmlns:content', 'http://purl.org/rss/1.0/modules/content/'));
$rss->setAttributeNodeNS(new DomAttr('xmlns:atom', 'http://www.w3.org/2005/Atom'));
$channel = $this->dom->createElement('channel');
//
$generator = $this->dom->createElement('generator', 'PicoFeed (https://github.com/fguillot/picoFeed)');
$channel->appendChild($generator);
//
$title = $this->dom->createElement('title');
$title->appendChild($this->dom->createTextNode($this->title));
$channel->appendChild($title);
//
$description = $this->dom->createElement('description');
$description->appendChild($this->dom->createTextNode($this->description ?: $this->title));
$channel->appendChild($description);
//
$this->addPubDate($channel, $this->updated);
//
$link = $this->dom->createElement('atom:link');
$link->setAttribute('href', $this->feed_url);
$link->setAttribute('rel', 'self');
$link->setAttribute('type', 'application/rss+xml');
$channel->appendChild($link);
//
$link = $this->dom->createElement('link');
$link->appendChild($this->dom->createTextNode($this->site_url));
$channel->appendChild($link);
//
if (isset($this->author)) {
$this->addAuthor($channel, 'webMaster', $this->author);
}
//
foreach ($this->items as $item) {
$this->checkRequiredProperties($this->required_item_properties, $item);
$channel->appendChild($this->createEntry($item));
}
$rss->appendChild($channel);
$this->dom->appendChild($rss);
if ($filename) {
$this->dom->save($filename);
} else {
return $this->dom->saveXML();
}
}
/**
* Create item entry.
*
* @param arrray $item Item properties
*
* @return DomElement
*/
public function createEntry(array $item)
{
$entry = $this->dom->createElement('item');
//
$title = $this->dom->createElement('title');
$title->appendChild($this->dom->createTextNode($item['title']));
$entry->appendChild($title);
//
$link = $this->dom->createElement('link');
$link->appendChild($this->dom->createTextNode($item['url']));
$entry->appendChild($link);
//
if (isset($item['id'])) {
$guid = $this->dom->createElement('guid');
$guid->setAttribute('isPermaLink', 'false');
$guid->appendChild($this->dom->createTextNode($item['id']));
$entry->appendChild($guid);
} else {
$guid = $this->dom->createElement('guid');
$guid->setAttribute('isPermaLink', 'true');
$guid->appendChild($this->dom->createTextNode($item['url']));
$entry->appendChild($guid);
}
//
$this->addPubDate($entry, isset($item['updated']) ? $item['updated'] : '');
//
if (isset($item['summary'])) {
$description = $this->dom->createElement('description');
$description->appendChild($this->dom->createTextNode($item['summary']));
$entry->appendChild($description);
}
//
if (isset($item['content'])) {
$content = $this->dom->createElement('content:encoded');
$content->appendChild($this->dom->createCDATASection($item['content']));
$entry->appendChild($content);
}
//
if (isset($item['author'])) {
$this->addAuthor($entry, 'author', $item['author']);
}
return $entry;
}
/**
* Add publication date.
*
* @param DomElement $xml XML node
* @param int $value Timestamp
*/
public function addPubDate(DomElement $xml, $value = 0)
{
$xml->appendChild($this->dom->createElement(
'pubDate',
date(DATE_RSS, $value ?: time())
));
}
/**
* Add author.
*
* @param DomElement $xml XML node
* @param string $tag Tag name
* @param array $values Author name and email
*/
public function addAuthor(DomElement $xml, $tag, array $values)
{
$value = '';
if (isset($values['email'])) {
$value .= $values['email'];
}
if ($value && isset($values['name'])) {
$value .= ' ('.$values['name'].')';
}
if ($value) {
$author = $this->dom->createElement($tag);
$author->appendChild($this->dom->createTextNode($value));
$xml->appendChild($author);
}
}
}
picoFeed-0.1.18/lib/PicoFeed/Syndication/Writer.php 0000664 0000000 0000000 00000003175 12656251722 0022032 0 ustar 00root root 0000000 0000000 $property)) || (is_array($container) && !isset($container[$property]))) {
throw new RuntimeException('Required property missing: '.$property);
}
}
}
}
picoFeed-0.1.18/picofeed 0000775 0000000 0000000 00000005246 12656251722 0015042 0 ustar 00root root 0000000 0000000 #!/usr/bin/env php
discover($url);
$parser = $reader->getParser(
$resource->getUrl(),
$resource->getContent(),
$resource->getEncoding()
);
if ($disable_filtering) {
$parser->disableContentFiltering();
}
return $parser->execute();
}
catch (PicoFeedException $e) {
echo 'Exception thrown ===> "'.$e->getMessage().'"'.PHP_EOL;
return false;
}
}
function get_item($feed, $item_id)
{
foreach ($feed->items as $item) {
if ($item->getId() === $item_id) {
echo $item;
echo "============= CONTENT ================\n";
echo $item->getContent();
echo "\n============= CONTENT ================\n";
break;
}
}
}
function dump_feed($url)
{
$feed = get_feed($url);
echo $feed;
}
function debug_feed($url)
{
get_feed($url);
print_r(Logger::getMessages());
}
function dump_item($url, $item_id)
{
$feed = get_feed($url);
if ($feed !== false) {
get_item($feed, $item_id);
}
}
function nofilter_item($url, $item_id)
{
$feed = get_feed($url, true);
if ($feed !== false) {
get_item($feed, $item_id);
}
}
function grabber($url)
{
$grabber = new Scraper(new Config);
$grabber->setUrl($url);
$grabber->execute();
print_r(Logger::getMessages());
echo "============= CONTENT ================\n";
echo $grabber->getRelevantContent().PHP_EOL;
echo "============= FILTERED ================\n";
echo $grabber->getFilteredContent().PHP_EOL;
}
// Parse command line arguments
if ($argc === 4) {
switch ($argv[1]) {
case 'item':
dump_item($argv[2], $argv[3]);
die;
case 'nofilter':
nofilter_item($argv[2], $argv[3]);
die;
}
}
else if ($argc === 3) {
switch ($argv[1]) {
case 'feed':
dump_feed($argv[2]);
die;
case 'debug':
debug_feed($argv[2]);
die;
case 'grabber':
grabber($argv[2]);
die;
}
}
printf("Usage:\n");
printf("%s feed \n", $argv[0]);
printf("%s debug \n", $argv[0]);
printf("%s item \n", $argv[0]);
printf("%s nofilter \n", $argv[0]);
printf("%s grabber \n", $argv[0]);