pax_global_header00006660000000000000000000000064126562517220014522gustar00rootroot0000000000000052 comment=8f776343b0dada397c2a950a7c3f7be57442fa35 picoFeed-0.1.18/000077500000000000000000000000001265625172200133275ustar00rootroot00000000000000picoFeed-0.1.18/LICENSE000066400000000000000000000020731265625172200143360ustar00rootroot00000000000000The MIT License (MIT) Copyright (c) 2015 Frederic Guillot Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. picoFeed-0.1.18/lib/000077500000000000000000000000001265625172200140755ustar00rootroot00000000000000picoFeed-0.1.18/lib/PicoFeed/000077500000000000000000000000001265625172200155535ustar00rootroot00000000000000picoFeed-0.1.18/lib/PicoFeed/Client/000077500000000000000000000000001265625172200167715ustar00rootroot00000000000000picoFeed-0.1.18/lib/PicoFeed/Client/Client.php000066400000000000000000000331141265625172200207220ustar00rootroot00000000000000request_headers = $headers; } /** * Perform the HTTP request. * * @param string $url URL * * @return Client */ public function execute($url = '') { if ($url !== '') { $this->url = $url; } Logger::setMessage(get_called_class().' Fetch URL: '.$this->url); Logger::setMessage(get_called_class().' Etag provided: '.$this->etag); Logger::setMessage(get_called_class().' Last-Modified provided: '.$this->last_modified); $response = $this->doRequest(); $this->status_code = $response['status']; $this->handleNotModifiedResponse($response); $this->handleNotFoundResponse($response); $this->handleNormalResponse($response); return $this; } /** * Handle not modified response. * * @param array $response Client response */ public function handleNotModifiedResponse(array $response) { if ($response['status'] == 304) { $this->is_modified = false; } elseif ($response['status'] == 200) { $this->is_modified = $this->hasBeenModified($response, $this->etag, $this->last_modified); $this->etag = $this->getHeader($response, 'ETag'); $this->last_modified = $this->getHeader($response, 'Last-Modified'); } if ($this->is_modified === false) { Logger::setMessage(get_called_class().' Resource not modified'); } } /** * Handle not found response. * * @param array $response Client response */ public function handleNotFoundResponse(array $response) { if ($response['status'] == 404) { throw new InvalidUrlException('Resource not found'); } } /** * Handle normal response. * * @param array $response Client response */ public function handleNormalResponse(array $response) { if ($response['status'] == 200) { $this->content = $response['body']; $this->content_type = $this->findContentType($response); $this->encoding = $this->findCharset(); } } /** * Check if a request has been modified according to the parameters. * * @param array $response * @param string $etag * @param string $lastModified * * @return bool */ private function hasBeenModified($response, $etag, $lastModified) { $headers = array( 'Etag' => $etag, 'Last-Modified' => $lastModified, ); // Compare the values for each header that is present $presentCacheHeaderCount = 0; foreach ($headers as $key => $value) { if (isset($response['headers'][$key])) { if ($response['headers'][$key] !== $value) { return true; } ++$presentCacheHeaderCount; } } // If at least one header is present and the values match, the response // was not modified if ($presentCacheHeaderCount > 0) { return false; } return true; } /** * Find content type from response headers. * * @param array $response Client response * * @return string */ public function findContentType(array $response) { return strtolower($this->getHeader($response, 'Content-Type')); } /** * Find charset from response headers. * * @return string */ public function findCharset() { $result = explode('charset=', $this->content_type); return isset($result[1]) ? $result[1] : ''; } /** * Get header value from a client response. * * @param array $response Client response * @param string $header Header name * * @return string */ public function getHeader(array $response, $header) { return isset($response['headers'][$header]) ? $response['headers'][$header] : ''; } /** * Set the Last-Modified HTTP header. * * @param string $last_modified Header value * * @return \PicoFeed\Client\Client */ public function setLastModified($last_modified) { $this->last_modified = $last_modified; return $this; } /** * Get the value of the Last-Modified HTTP header. * * @return string */ public function getLastModified() { return $this->last_modified; } /** * Set the value of the Etag HTTP header. * * @param string $etag Etag HTTP header value * * @return \PicoFeed\Client\Client */ public function setEtag($etag) { $this->etag = $etag; return $this; } /** * Get the Etag HTTP header value. * * @return string */ public function getEtag() { return $this->etag; } /** * Get the final url value. * * @return string */ public function getUrl() { return $this->url; } /** * Set the url. * * @return string * @return \PicoFeed\Client\Client */ public function setUrl($url) { $this->url = $url; return $this; } /** * Get the HTTP response status code. * * @return int */ public function getStatusCode() { return $this->status_code; } /** * Get the body of the HTTP response. * * @return string */ public function getContent() { return $this->content; } /** * Get the content type value from HTTP headers. * * @return string */ public function getContentType() { return $this->content_type; } /** * Get the encoding value from HTTP headers. * * @return string */ public function getEncoding() { return $this->encoding; } /** * Return true if the remote resource has changed. * * @return bool */ public function isModified() { return $this->is_modified; } /** * return true if passthrough mode is enabled. * * @return bool */ public function isPassthroughEnabled() { return $this->passthrough; } /** * Set connection timeout. * * @param int $timeout Connection timeout * * @return \PicoFeed\Client\Client */ public function setTimeout($timeout) { $this->timeout = $timeout ?: $this->timeout; return $this; } /** * Set a custom user agent. * * @param string $user_agent User Agent * * @return \PicoFeed\Client\Client */ public function setUserAgent($user_agent) { $this->user_agent = $user_agent ?: $this->user_agent; return $this; } /** * Set the maximum number of HTTP redirections. * * @param int $max Maximum * * @return \PicoFeed\Client\Client */ public function setMaxRedirections($max) { $this->max_redirects = $max ?: $this->max_redirects; return $this; } /** * Set the maximum size of the HTTP body. * * @param int $max Maximum * * @return \PicoFeed\Client\Client */ public function setMaxBodySize($max) { $this->max_body_size = $max ?: $this->max_body_size; return $this; } /** * Set the proxy hostname. * * @param string $hostname Proxy hostname * * @return \PicoFeed\Client\Client */ public function setProxyHostname($hostname) { $this->proxy_hostname = $hostname ?: $this->proxy_hostname; return $this; } /** * Set the proxy port. * * @param int $port Proxy port * * @return \PicoFeed\Client\Client */ public function setProxyPort($port) { $this->proxy_port = $port ?: $this->proxy_port; return $this; } /** * Set the proxy username. * * @param string $username Proxy username * * @return \PicoFeed\Client\Client */ public function setProxyUsername($username) { $this->proxy_username = $username ?: $this->proxy_username; return $this; } /** * Set the proxy password. * * @param string $password Password * * @return \PicoFeed\Client\Client */ public function setProxyPassword($password) { $this->proxy_password = $password ?: $this->proxy_password; return $this; } /** * Set the username. * * @param string $username Basic Auth username * * @return \PicoFeed\Client\Client */ public function setUsername($username) { $this->username = $username ?: $this->username; return $this; } /** * Set the password. * * @param string $password Basic Auth Password * * @return \PicoFeed\Client\Client */ public function setPassword($password) { $this->password = $password ?: $this->password; return $this; } /** * Enable the passthrough mode. * * @return \PicoFeed\Client\Client */ public function enablePassthroughMode() { $this->passthrough = true; return $this; } /** * Disable the passthrough mode. * * @return \PicoFeed\Client\Client */ public function disablePassthroughMode() { $this->passthrough = false; return $this; } /** * Set config object. * * @param \PicoFeed\Config\Config $config Config instance * * @return \PicoFeed\Client\Client */ public function setConfig(Config $config) { if ($config !== null) { $this->setTimeout($config->getClientTimeout()); $this->setUserAgent($config->getClientUserAgent()); $this->setMaxRedirections($config->getMaxRedirections()); $this->setMaxBodySize($config->getMaxBodySize()); $this->setProxyHostname($config->getProxyHostname()); $this->setProxyPort($config->getProxyPort()); $this->setProxyUsername($config->getProxyUsername()); $this->setProxyPassword($config->getProxyPassword()); } return $this; } /** * Return true if the HTTP status code is a redirection * * @access protected * @param integer $code * @return boolean */ public function isRedirection($code) { return $code == 301 || $code == 302 || $code == 303 || $code == 307; } } picoFeed-0.1.18/lib/PicoFeed/Client/ClientException.php000066400000000000000000000003071265625172200225770ustar00rootroot00000000000000body_length += $length; if ($this->body_length > $this->max_body_size) { return -1; } $this->body .= $buffer; return $length; } /** * cURL callback to read HTTP headers. * * @param resource $ch cURL handler * @param string $buffer Header line * * @return int Length of the buffer */ public function readHeaders($ch, $buffer) { $length = strlen($buffer); if ($buffer === "\r\n" || $buffer === "\n") { ++$this->response_headers_count; } else { if (!isset($this->response_headers[$this->response_headers_count])) { $this->response_headers[$this->response_headers_count] = ''; } $this->response_headers[$this->response_headers_count] .= $buffer; } return $length; } /** * cURL callback to passthrough the HTTP body to the client. * * If the function return -1, curl stop to read the HTTP response * * @param resource $ch cURL handler * @param string $buffer Chunk of data * * @return int Length of the buffer */ public function passthroughBody($ch, $buffer) { // do it only at the beginning of a transmission if ($this->body_length === 0) { list($status, $headers) = HttpHeaders::parse(explode("\n", $this->response_headers[$this->response_headers_count - 1])); if ($this->isRedirection($status)) { return $this->handleRedirection($headers['Location']); } header($status); if (isset($headers['Content-Type'])) { header('Content-Type:' .$headers['Content-Type']); } } $length = strlen($buffer); $this->body_length += $length; echo $buffer; return $length; } /** * Prepare HTTP headers. * * @return string[] */ private function prepareHeaders() { $headers = array( 'Connection: close', ); if ($this->etag) { $headers[] = 'If-None-Match: '.$this->etag; } if ($this->last_modified) { $headers[] = 'If-Modified-Since: '.$this->last_modified; } $headers = array_merge($headers, $this->request_headers); return $headers; } /** * Prepare curl proxy context. * * @param resource $ch * * @return resource $ch */ private function prepareProxyContext($ch) { if ($this->proxy_hostname) { Logger::setMessage(get_called_class().' Proxy: '.$this->proxy_hostname.':'.$this->proxy_port); curl_setopt($ch, CURLOPT_PROXYPORT, $this->proxy_port); curl_setopt($ch, CURLOPT_PROXYTYPE, 'HTTP'); curl_setopt($ch, CURLOPT_PROXY, $this->proxy_hostname); if ($this->proxy_username) { Logger::setMessage(get_called_class().' Proxy credentials: Yes'); curl_setopt($ch, CURLOPT_PROXYUSERPWD, $this->proxy_username.':'.$this->proxy_password); } else { Logger::setMessage(get_called_class().' Proxy credentials: No'); } } return $ch; } /** * Prepare curl auth context. * * @param resource $ch * * @return resource $ch */ private function prepareAuthContext($ch) { if ($this->username && $this->password) { curl_setopt($ch, CURLOPT_USERPWD, $this->username.':'.$this->password); } return $ch; } /** * Set write/header functions. * * @param resource $ch * * @return resource $ch */ private function prepareDownloadMode($ch) { $write_function = 'readBody'; $header_function = 'readHeaders'; if ($this->isPassthroughEnabled()) { $write_function = 'passthroughBody'; } curl_setopt($ch, CURLOPT_WRITEFUNCTION, array($this, $write_function)); curl_setopt($ch, CURLOPT_HEADERFUNCTION, array($this, $header_function)); return $ch; } /** * Prepare curl context. * * @return resource */ private function prepareContext() { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $this->url); curl_setopt($ch, CURLOPT_HTTP_VERSION, CURL_HTTP_VERSION_1_1); curl_setopt($ch, CURLOPT_TIMEOUT, $this->timeout); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $this->timeout); curl_setopt($ch, CURLOPT_USERAGENT, $this->user_agent); curl_setopt($ch, CURLOPT_HTTPHEADER, $this->prepareHeaders()); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false); curl_setopt($ch, CURLOPT_ENCODING, ''); curl_setopt($ch, CURLOPT_COOKIEJAR, 'php://memory'); curl_setopt($ch, CURLOPT_COOKIEFILE, 'php://memory'); // Disable SSLv3 by enforcing TLSv1.x for curl >= 7.34.0 and < 7.39.0. // Versions prior to 7.34 and at least when compiled against openssl // interpret this parameter as "limit to TLSv1.0" which fails for sites // which enforce TLS 1.1+. // Starting with curl 7.39.0 SSLv3 is disabled by default. $version = curl_version(); if ($version['version_number'] >= 467456 && $version['version_number'] < 468736) { curl_setopt($ch, CURLOPT_SSLVERSION, 1); } $ch = $this->prepareDownloadMode($ch); $ch = $this->prepareProxyContext($ch); $ch = $this->prepareAuthContext($ch); return $ch; } /** * Execute curl context. */ private function executeContext() { $ch = $this->prepareContext(); curl_exec($ch); Logger::setMessage(get_called_class().' cURL total time: '.curl_getinfo($ch, CURLINFO_TOTAL_TIME)); Logger::setMessage(get_called_class().' cURL dns lookup time: '.curl_getinfo($ch, CURLINFO_NAMELOOKUP_TIME)); Logger::setMessage(get_called_class().' cURL connect time: '.curl_getinfo($ch, CURLINFO_CONNECT_TIME)); Logger::setMessage(get_called_class().' cURL speed download: '.curl_getinfo($ch, CURLINFO_SPEED_DOWNLOAD)); Logger::setMessage(get_called_class().' cURL effective url: '.curl_getinfo($ch, CURLINFO_EFFECTIVE_URL)); $curl_errno = curl_errno($ch); if ($curl_errno) { Logger::setMessage(get_called_class().' cURL error: '.curl_error($ch)); curl_close($ch); $this->handleError($curl_errno); } // Update the url if there where redirects $this->url = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); curl_close($ch); } /** * Do the HTTP request. * * @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...] */ public function doRequest() { $this->executeContext(); list($status, $headers) = HttpHeaders::parse(explode("\n", $this->response_headers[$this->response_headers_count - 1])); if ($this->isRedirection($status)) { return $this->handleRedirection($headers['Location']); } return array( 'status' => $status, 'body' => $this->body, 'headers' => $headers, ); } /** * Handle HTTP redirects * * @param string $location Redirected URL * * @return array */ private function handleRedirection($location) { $nb_redirects = 0; $result = array(); $this->url = Url::resolve($location, $this->url); $this->body = ''; $this->body_length = 0; $this->response_headers = array(); $this->response_headers_count = 0; while (true) { ++$nb_redirects; if ($nb_redirects >= $this->max_redirects) { throw new MaxRedirectException('Maximum number of redirections reached'); } $result = $this->doRequest(); if ($this->isRedirection($result['status'])) { $this->url = Url::resolve($result['headers']['Location'], $this->url); $this->body = ''; $this->body_length = 0; $this->response_headers = array(); $this->response_headers_count = 0; } else { break; } } return $result; } /** * Handle cURL errors (throw individual exceptions). * * We don't use constants because they are not necessary always available * (depends of the version of libcurl linked to php) * * @see http://curl.haxx.se/libcurl/c/libcurl-errors.html * * @param int $errno cURL error code */ private function handleError($errno) { switch ($errno) { case 78: // CURLE_REMOTE_FILE_NOT_FOUND throw new InvalidUrlException('Resource not found'); case 6: // CURLE_COULDNT_RESOLVE_HOST throw new InvalidUrlException('Unable to resolve hostname'); case 7: // CURLE_COULDNT_CONNECT throw new InvalidUrlException('Unable to connect to the remote host'); case 23: // CURLE_WRITE_ERROR throw new MaxSizeException('Maximum response size exceeded'); case 28: // CURLE_OPERATION_TIMEDOUT throw new TimeoutException('Operation timeout'); case 35: // CURLE_SSL_CONNECT_ERROR case 51: // CURLE_PEER_FAILED_VERIFICATION case 58: // CURLE_SSL_CERTPROBLEM case 60: // CURLE_SSL_CACERT case 59: // CURLE_SSL_CIPHER case 64: // CURLE_USE_SSL_FAILED case 66: // CURLE_SSL_ENGINE_INITFAILED case 77: // CURLE_SSL_CACERT_BADFILE case 83: // CURLE_SSL_ISSUER_ERROR throw new InvalidCertificateException('Invalid SSL certificate'); case 47: // CURLE_TOO_MANY_REDIRECTS throw new MaxRedirectException('Maximum number of redirections reached'); case 63: // CURLE_FILESIZE_EXCEEDED throw new MaxSizeException('Maximum response size exceeded'); default: throw new InvalidUrlException('Unable to fetch the URL'); } } } picoFeed-0.1.18/lib/PicoFeed/Client/HttpHeaders.php000066400000000000000000000034621265625172200217220ustar00rootroot00000000000000 $value) { $this->headers[strtolower($key)] = $value; } } public function offsetGet($offset) { return $this->headers[strtolower($offset)]; } public function offsetSet($offset, $value) { $this->headers[strtolower($offset)] = $value; } public function offsetExists($offset) { return isset($this->headers[strtolower($offset)]); } public function offsetUnset($offset) { unset($this->headers[strtolower($offset)]); } /** * Parse HTTP headers. * * @static * * @param array $lines List of headers * * @return array */ public static function parse(array $lines) { $status = 0; $headers = array(); foreach ($lines as $line) { if (strpos($line, 'HTTP/1') === 0) { $headers = array(); $status = (int) substr($line, 9, 3); } elseif (strpos($line, ': ') !== false) { list($name, $value) = explode(': ', $line); if ($value) { $headers[trim($name)] = trim($value); } } } Logger::setMessage(get_called_class().' HTTP status code: '.$status); foreach ($headers as $name => $value) { Logger::setMessage(get_called_class().' HTTP header: '.$name.' => '.$value); } return array($status, new self($headers)); } } picoFeed-0.1.18/lib/PicoFeed/Client/InvalidCertificateException.php000066400000000000000000000002631265625172200251130ustar00rootroot00000000000000user_agent, ); // disable compression in passthrough mode. It could result in double // compressed content which isn't decodeable by browsers if (function_exists('gzdecode') && !$this->isPassthroughEnabled()) { $headers[] = 'Accept-Encoding: gzip'; } if ($this->etag) { $headers[] = 'If-None-Match: '.$this->etag; } if ($this->last_modified) { $headers[] = 'If-Modified-Since: '.$this->last_modified; } if ($this->proxy_username) { $headers[] = 'Proxy-Authorization: Basic '.base64_encode($this->proxy_username.':'.$this->proxy_password); } if ($this->username && $this->password) { $headers[] = 'Authorization: Basic '.base64_encode($this->username.':'.$this->password); } $headers = array_merge($headers, $this->request_headers); return $headers; } /** * Construct the final URL from location headers. * * @param array $headers List of HTTP response header */ private function setEffectiveUrl($headers) { foreach ($headers as $header) { if (stripos($header, 'Location') === 0) { list(, $value) = explode(': ', $header); $this->url = Url::resolve($value, $this->url); } } } /** * Prepare stream context. * * @return array */ private function prepareContext() { $context = array( 'http' => array( 'method' => 'GET', 'protocol_version' => 1.1, 'timeout' => $this->timeout, 'max_redirects' => $this->max_redirects, ), ); if ($this->proxy_hostname) { Logger::setMessage(get_called_class().' Proxy: '.$this->proxy_hostname.':'.$this->proxy_port); $context['http']['proxy'] = 'tcp://'.$this->proxy_hostname.':'.$this->proxy_port; $context['http']['request_fulluri'] = true; if ($this->proxy_username) { Logger::setMessage(get_called_class().' Proxy credentials: Yes'); } else { Logger::setMessage(get_called_class().' Proxy credentials: No'); } } $context['http']['header'] = implode("\r\n", $this->prepareHeaders()); return $context; } /** * Do the HTTP request. * * @return array HTTP response ['body' => ..., 'status' => ..., 'headers' => ...] */ public function doRequest() { $body = ''; // Create context $context = stream_context_create($this->prepareContext()); // Make HTTP request $stream = @fopen($this->url, 'r', false, $context); if (!is_resource($stream)) { throw new InvalidUrlException('Unable to establish a connection'); } // Get HTTP headers response $metadata = stream_get_meta_data($stream); list($status, $headers) = HttpHeaders::parse($metadata['wrapper_data']); if ($this->isPassthroughEnabled()) { header(':', true, $status); if (isset($headers['Content-Type'])) { header('Content-Type: '.$headers['Content-Type']); } fpassthru($stream); } else { // Get the entire body until the max size $body = stream_get_contents($stream, $this->max_body_size + 1); // If the body size is too large abort everything if (strlen($body) > $this->max_body_size) { throw new MaxSizeException('Content size too large'); } if ($metadata['timed_out']) { throw new TimeoutException('Operation timeout'); } } fclose($stream); $this->setEffectiveUrl($metadata['wrapper_data']); return array( 'status' => $status, 'body' => $this->decodeBody($body, $headers), 'headers' => $headers, ); } /** * Decode body response according to the HTTP headers. * * @param string $body Raw body * @param HttpHeaders $headers HTTP headers * * @return string */ public function decodeBody($body, HttpHeaders $headers) { if (isset($headers['Transfer-Encoding']) && $headers['Transfer-Encoding'] === 'chunked') { $body = $this->decodeChunked($body); } if (isset($headers['Content-Encoding']) && $headers['Content-Encoding'] === 'gzip') { $body = gzdecode($body); } return $body; } /** * Decode a chunked body. * * @param string $str Raw body * * @return string Decoded body */ public function decodeChunked($str) { for ($result = ''; !empty($str); $str = trim($str)) { // Get the chunk length $pos = strpos($str, "\r\n"); $len = hexdec(substr($str, 0, $pos)); // Append the chunk to the result $result .= substr($str, $pos + 2, $len); $str = substr($str, $pos + 2 + $len); } return $result; } } picoFeed-0.1.18/lib/PicoFeed/Client/TimeoutException.php000066400000000000000000000002351265625172200230070ustar00rootroot00000000000000url = $url; $this->components = parse_url($url) ?: array(); // Issue with PHP < 5.4.7 and protocol relative url if (version_compare(PHP_VERSION, '5.4.7', '<') && $this->isProtocolRelative()) { $pos = strpos($this->components['path'], '/', 2); if ($pos === false) { $pos = strlen($this->components['path']); } $this->components['host'] = substr($this->components['path'], 2, $pos - 2); $this->components['path'] = substr($this->components['path'], $pos); } } /** * Shortcut method to get an absolute url from relative url. * * @static * * @param mixed $item_url Unknown url (can be relative or not) * @param mixed $website_url Website url * * @return string */ public static function resolve($item_url, $website_url) { $link = is_string($item_url) ? new self($item_url) : $item_url; $website = is_string($website_url) ? new self($website_url) : $website_url; if ($link->isRelativeUrl()) { if ($link->isRelativePath()) { return $link->getAbsoluteUrl($website->getBaseUrl($website->getBasePath())); } return $link->getAbsoluteUrl($website->getBaseUrl()); } elseif ($link->isProtocolRelative()) { $link->setScheme($website->getScheme()); } return $link->getAbsoluteUrl(); } /** * Shortcut method to get a base url. * * @static * * @param string $url * * @return string */ public static function base($url) { $link = new self($url); return $link->getBaseUrl(); } /** * Get the base URL. * * @param string $suffix Add a suffix to the url * * @return string */ public function getBaseUrl($suffix = '') { return $this->hasHost() ? $this->getScheme('://').$this->getHost().$this->getPort(':').$suffix : ''; } /** * Get the absolute URL. * * @param string $base_url Use this url as base url * * @return string */ public function getAbsoluteUrl($base_url = '') { if ($base_url) { $base = new self($base_url); $url = $base->getAbsoluteUrl().substr($this->getFullPath(), 1); } else { $url = $this->hasHost() ? $this->getBaseUrl().$this->getFullPath() : ''; } return $url; } /** * Return true if the url is relative. * * @return bool */ public function isRelativeUrl() { return !$this->hasScheme() && !$this->isProtocolRelative(); } /** * Return true if the path is relative. * * @return bool */ public function isRelativePath() { $path = $this->getPath(); return empty($path) || $path{0} !== '/'; } /** * Filters the path of a URI. * * Imported from Guzzle library: https://github.com/guzzle/psr7/blob/master/src/Uri.php#L568-L582 * * @param $path * * @return string */ public function filterPath($path, $charUnreserved = 'a-zA-Z0-9_\-\.~', $charSubDelims = '!\$&\'\(\)\*\+,;=') { return preg_replace_callback( '/(?:[^'.$charUnreserved.$charSubDelims.':@\/%]+|%(?![A-Fa-f0-9]{2}))/', function (array $matches) { return rawurlencode($matches[0]); }, $path ); } /** * Get the path. * * @return string */ public function getPath() { return $this->filterPath(empty($this->components['path']) ? '' : $this->components['path']); } /** * Get the base path. * * @return string */ public function getBasePath() { $current_path = $this->getPath(); $path = $this->isRelativePath() ? '/' : ''; $path .= substr($current_path, -1) === '/' ? $current_path : dirname($current_path); return preg_replace('/\\\\\/|\/\//', '/', $path.'/'); } /** * Get the full path (path + querystring + fragment). * * @return string */ public function getFullPath() { $path = $this->isRelativePath() ? '/' : ''; $path .= $this->getPath(); $path .= empty($this->components['query']) ? '' : '?'.$this->components['query']; $path .= empty($this->components['fragment']) ? '' : '#'.$this->components['fragment']; return $path; } /** * Get the hostname. * * @return string */ public function getHost() { return empty($this->components['host']) ? '' : $this->components['host']; } /** * Return true if the url has a hostname. * * @return bool */ public function hasHost() { return !empty($this->components['host']); } /** * Get the scheme. * * @param string $suffix Suffix to add when there is a scheme * * @return string */ public function getScheme($suffix = '') { return ($this->hasScheme() ? $this->components['scheme'] : 'http').$suffix; } /** * Set the scheme. * * @param string $scheme Set a scheme * * @return string */ public function setScheme($scheme) { $this->components['scheme'] = $scheme; } /** * Return true if the url has a scheme. * * @return bool */ public function hasScheme() { return !empty($this->components['scheme']); } /** * Get the port. * * @param string $prefix Prefix to add when there is a port * * @return string */ public function getPort($prefix = '') { return $this->hasPort() ? $prefix.$this->components['port'] : ''; } /** * Return true if the url has a port. * * @return bool */ public function hasPort() { return !empty($this->components['port']); } /** * Return true if the url is protocol relative (start with //). * * @return bool */ public function isProtocolRelative() { return strpos($this->url, '//') === 0; } } picoFeed-0.1.18/lib/PicoFeed/Config/000077500000000000000000000000001265625172200167605ustar00rootroot00000000000000picoFeed-0.1.18/lib/PicoFeed/Config/Config.php000066400000000000000000000102221265625172200206730ustar00rootroot00000000000000container[$parameter] = $arguments[0]; return $this; } elseif ($prefix === 'get') { $default_value = isset($arguments[0]) ? $arguments[0] : null; return isset($this->container[$parameter]) ? $this->container[$parameter] : $default_value; } } } picoFeed-0.1.18/lib/PicoFeed/Encoding/000077500000000000000000000000001265625172200173015ustar00rootroot00000000000000picoFeed-0.1.18/lib/PicoFeed/Encoding/Encoding.php000066400000000000000000000015001265625172200215340ustar00rootroot00000000000000 array('controls', 'src'), 'video' => array('poster', 'controls', 'height', 'width', 'src'), 'source' => array('src', 'type'), 'dt' => array(), 'dd' => array(), 'dl' => array(), 'table' => array(), 'caption' => array(), 'tr' => array(), 'th' => array(), 'td' => array(), 'tbody' => array(), 'thead' => array(), 'h2' => array(), 'h3' => array(), 'h4' => array(), 'h5' => array(), 'h6' => array(), 'strong' => array(), 'em' => array(), 'code' => array(), 'pre' => array(), 'blockquote' => array(), 'p' => array(), 'ul' => array(), 'li' => array(), 'ol' => array(), 'br' => array(), 'del' => array(), 'a' => array('href'), 'img' => array('src', 'title', 'alt'), 'figure' => array(), 'figcaption' => array(), 'cite' => array(), 'time' => array('datetime'), 'abbr' => array('title'), 'iframe' => array('width', 'height', 'frameborder', 'src', 'allowfullscreen'), 'q' => array('cite'), ); /** * Scheme whitelist. * * For a complete list go to http://en.wikipedia.org/wiki/URI_scheme * * @var array */ private $scheme_whitelist = array( 'bitcoin:', 'callto:', 'ed2k://', 'facetime://', 'feed:', 'ftp://', 'geo:', 'git://', 'http://', 'https://', 'irc://', 'irc6://', 'ircs://', 'jabber:', 'magnet:', 'mailto:', 'nntp://', 'rtmp://', 'sftp://', 'sip:', 'sips:', 'skype:', 'smb://', 'sms:', 'spotify:', 'ssh:', 'steam:', 'svn://', 'tel:', ); /** * Iframe source whitelist, everything else is ignored. * * @var array */ private $iframe_whitelist = array( 'http://www.youtube.com', 'https://www.youtube.com', 'http://player.vimeo.com', 'https://player.vimeo.com', 'http://www.dailymotion.com', 'https://www.dailymotion.com', 'http://vk.com', 'https://vk.com', ); /** * Blacklisted resources. * * @var array */ private $media_blacklist = array( 'api.flattr.com', 'feeds.feedburner.com', 'share.feedsportal.com', 'da.feedsportal.com', 'rc.feedsportal.com', 'rss.feedsportal.com', 'res.feedsportal.com', 'res1.feedsportal.com', 'res2.feedsportal.com', 'res3.feedsportal.com', 'pi.feedsportal.com', 'rss.nytimes.com', 'feeds.wordpress.com', 'stats.wordpress.com', 'rss.cnn.com', 'twitter.com/home?status=', 'twitter.com/share', 'twitter_icon_large.png', 'www.facebook.com/sharer.php', 'facebook_icon_large.png', 'plus.google.com/share', 'www.gstatic.com/images/icons/gplus-16.png', 'www.gstatic.com/images/icons/gplus-32.png', 'www.gstatic.com/images/icons/gplus-64.png', ); /** * Attributes used for external resources. * * @var array */ private $media_attributes = array( 'src', 'href', 'poster', ); /** * Attributes that must be integer. * * @var array */ private $integer_attributes = array( 'width', 'height', 'frameborder', ); /** * Mandatory attributes for specified tags. * * @var array */ private $required_attributes = array( 'a' => array('href'), 'img' => array('src'), 'iframe' => array('src'), 'audio' => array('src'), 'source' => array('src'), ); /** * Add attributes to specified tags. * * @var array */ private $add_attributes = array( 'a' => array('rel' => 'noreferrer', 'target' => '_blank'), 'video' => array('controls' => 'true'), ); /** * List of filters to apply. * * @var array */ private $filters = array( 'filterAllowedAttribute', 'filterIntegerAttribute', 'rewriteAbsoluteUrl', 'filterIframeAttribute', 'filterBlacklistResourceAttribute', 'filterProtocolUrlAttribute', 'rewriteImageProxyUrl', 'secureIframeSrc', 'removeYouTubeAutoplay', ); /** * Add attributes to specified tags. * * @var \PicoFeed\Client\Url */ private $website; /** * Constructor. * * @param \PicoFeed\Client\Url $website Website url instance */ public function __construct(Url $website) { $this->website = $website; } /** * Apply filters to the attributes list. * * @param string $tag Tag name * @param array $attributes Attributes dictionary * * @return array Filtered attributes */ public function filter($tag, array $attributes) { foreach ($attributes as $attribute => &$value) { foreach ($this->filters as $filter) { if (!$this->$filter($tag, $attribute, $value)) { unset($attributes[$attribute]); break; } } } return $attributes; } /** * Return true if the value is allowed (remove not allowed attributes). * * @param string $tag Tag name * @param string $attribute Attribute name * @param string $value Attribute value * * @return bool */ public function filterAllowedAttribute($tag, $attribute, $value) { return isset($this->attribute_whitelist[$tag]) && in_array($attribute, $this->attribute_whitelist[$tag]); } /** * Return true if the value is not integer (remove attributes that should have an integer value). * * @param string $tag Tag name * @param string $attribute Attribute name * @param string $value Attribute value * * @return bool */ public function filterIntegerAttribute($tag, $attribute, $value) { if (in_array($attribute, $this->integer_attributes)) { return ctype_digit($value); } return true; } /** * Return true if the iframe source is allowed (remove not allowed iframe). * * @param string $tag Tag name * @param string $attribute Attribute name * @param string $value Attribute value * * @return bool */ public function filterIframeAttribute($tag, $attribute, $value) { if ($tag === 'iframe' && $attribute === 'src') { foreach ($this->iframe_whitelist as $url) { if (strpos($value, $url) === 0) { return true; } } return false; } return true; } /** * Return true if the resource is not blacklisted (remove blacklisted resource attributes). * * @param string $tag Tag name * @param string $attribute Attribute name * @param string $value Attribute value * * @return bool */ public function filterBlacklistResourceAttribute($tag, $attribute, $value) { if ($this->isResource($attribute) && $this->isBlacklistedMedia($value)) { return false; } return true; } /** * Convert all relative links to absolute url. * * @param string $tag Tag name * @param string $attribute Attribute name * @param string $value Attribute value * * @return bool */ public function rewriteAbsoluteUrl($tag, $attribute, &$value) { if ($this->isResource($attribute)) { $value = Url::resolve($value, $this->website); } return true; } /** * Turns iframes' src attribute from http to https to prevent * mixed active content. * * @param string $tag Tag name * @param array $attribute Atttributes name * @param string $value Attribute value * * @return bool */ public function secureIframeSrc($tag, $attribute, &$value) { if ($tag === 'iframe' && $attribute === 'src' && strpos($value, 'http://') === 0) { $value = substr_replace($value, 's', 4, 0); } return true; } /** * Removes YouTube autoplay from iframes. * * @param string $tag Tag name * @param array $attribute Atttributes name * @param string $value Attribute value * * @return bool */ public function removeYouTubeAutoplay($tag, $attribute, &$value) { $regex = '%^(https://(?:www\.)?youtube.com/.*\?.*autoplay=)(1)(.*)%i'; if ($tag === 'iframe' && $attribute === 'src' && preg_match($regex, $value)) { $value = preg_replace($regex, '${1}0$3', $value); } return true; } /** * Rewrite image url to use with a proxy. * * @param string $tag Tag name * @param string $attribute Attribute name * @param string $value Attribute value * * @return bool */ public function rewriteImageProxyUrl($tag, $attribute, &$value) { if ($tag === 'img' && $attribute === 'src' && !($this->image_proxy_limit_protocol !== '' && stripos($value, $this->image_proxy_limit_protocol.':') !== 0)) { if ($this->image_proxy_url) { $value = sprintf($this->image_proxy_url, rawurlencode($value)); } elseif (is_callable($this->image_proxy_callback)) { $value = call_user_func($this->image_proxy_callback, $value); } } return true; } /** * Return true if the scheme is authorized. * * @param string $tag Tag name * @param string $attribute Attribute name * @param string $value Attribute value * * @return bool */ public function filterProtocolUrlAttribute($tag, $attribute, $value) { if ($this->isResource($attribute) && !$this->isAllowedProtocol($value)) { return false; } return true; } /** * Automatically add/override some attributes for specific tags. * * @param string $tag Tag name * @param array $attributes Attributes list * * @return array */ public function addAttributes($tag, array $attributes) { if (isset($this->add_attributes[$tag])) { $attributes += $this->add_attributes[$tag]; } return $attributes; } /** * Return true if all required attributes are present. * * @param string $tag Tag name * @param array $attributes Attributes list * * @return bool */ public function hasRequiredAttributes($tag, array $attributes) { if (isset($this->required_attributes[$tag])) { foreach ($this->required_attributes[$tag] as $attribute) { if (!isset($attributes[$attribute])) { return false; } } } return true; } /** * Check if an attribute name is an external resource. * * @param string $attribute Attribute name * * @return bool */ public function isResource($attribute) { return in_array($attribute, $this->media_attributes); } /** * Detect if the protocol is allowed or not. * * @param string $value Attribute value * * @return bool */ public function isAllowedProtocol($value) { foreach ($this->scheme_whitelist as $protocol) { if (strpos($value, $protocol) === 0) { return true; } } return false; } /** * Detect if an url is blacklisted. * * @param string $resource Attribute value (URL) * * @return bool */ public function isBlacklistedMedia($resource) { foreach ($this->media_blacklist as $name) { if (strpos($resource, $name) !== false) { return true; } } return false; } /** * Convert the attribute list to html. * * @param array $attributes Attributes * * @return string */ public function toHtml(array $attributes) { $html = array(); foreach ($attributes as $attribute => $value) { $html[] = sprintf('%s="%s"', $attribute, Filter::escape($value)); } return implode(' ', $html); } /** * Set whitelisted tags and attributes for each tag. * * @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']] * * @return Attribute */ public function setWhitelistedAttributes(array $values) { $this->attribute_whitelist = $values ?: $this->attribute_whitelist; return $this; } /** * Set scheme whitelist. * * @param array $values List of scheme: ['http://', 'ftp://'] * * @return Attribute */ public function setSchemeWhitelist(array $values) { $this->scheme_whitelist = $values ?: $this->scheme_whitelist; return $this; } /** * Set media attributes (used to load external resources). * * @param array $values List of values: ['src', 'href'] * * @return Attribute */ public function setMediaAttributes(array $values) { $this->media_attributes = $values ?: $this->media_attributes; return $this; } /** * Set blacklisted external resources. * * @param array $values List of tags: ['http://google.com/', '...'] * * @return Attribute */ public function setMediaBlacklist(array $values) { $this->media_blacklist = $values ?: $this->media_blacklist; return $this; } /** * Set mandatory attributes for whitelisted tags. * * @param array $values List of tags: ['img' => 'src'] * * @return Attribute */ public function setRequiredAttributes(array $values) { $this->required_attributes = $values ?: $this->required_attributes; return $this; } /** * Set attributes to automatically to specific tags. * * @param array $values List of tags: ['a' => 'target="_blank"'] * * @return Attribute */ public function setAttributeOverrides(array $values) { $this->add_attributes = $values ?: $this->add_attributes; return $this; } /** * Set attributes that must be an integer. * * @param array $values List of tags: ['width', 'height'] * * @return Attribute */ public function setIntegerAttributes(array $values) { $this->integer_attributes = $values ?: $this->integer_attributes; return $this; } /** * Set allowed iframe resources. * * @param array $values List of tags: ['http://www.youtube.com'] * * @return Attribute */ public function setIframeWhitelist(array $values) { $this->iframe_whitelist = $values ?: $this->iframe_whitelist; return $this; } /** * Set image proxy URL. * * The original image url will be urlencoded * * @param string $url Proxy URL * * @return Attribute */ public function setImageProxyUrl($url) { $this->image_proxy_url = $url ?: $this->image_proxy_url; return $this; } /** * Set image proxy callback. * * @param \Closure $callback * * @return Attribute */ public function setImageProxyCallback($callback) { $this->image_proxy_callback = $callback ?: $this->image_proxy_callback; return $this; } /** * Set image proxy protocol restriction. * * @param string $value * * @return Attribute */ public function setImageProxyProtocol($value) { $this->image_proxy_limit_protocol = $value ?: $this->image_proxy_limit_protocol; return $this; } } picoFeed-0.1.18/lib/PicoFeed/Filter/Filter.php000066400000000000000000000073551265625172200207500ustar00rootroot00000000000000]*>\s*~i', '', $data); } /** * Remove the XML tag from a document. * * @static * * @param string $data Input data * * @return string */ public static function stripXmlTag($data) { if (strpos($data, '') + 2)); } do { $pos = strpos($data, '') + 2)); } } while ($pos !== false && $pos < 200); return $data; } /** * Strip head tag from the HTML content. * * @static * * @param string $data Input data * * @return string */ public static function stripHeadTags($data) { return preg_replace('@]*?>.*?@siu', '', $data); } /** * Trim whitespace from the begining, the end and inside a string and don't break utf-8 string. * * @static * * @param string $value Raw data * * @return string Normalized data */ public static function stripWhiteSpace($value) { $value = str_replace("\r", ' ', $value); $value = str_replace("\t", ' ', $value); $value = str_replace("\n", ' ', $value); // $value = preg_replace('/\s+/', ' ', $value); <= break utf-8 return trim($value); } /** * Fixes before XML parsing. * * @static * * @param string $data Raw data * * @return string Normalized data */ public static function normalizeData($data) { $entities = array( '/(&#)(\d+);/m', // decimal encoded '/(&#x)([a-f0-9]+);/mi', // hex encoded ); // strip invalid XML 1.0 characters which are encoded as entities $data = preg_replace_callback($entities, function ($matches) { $code_point = $matches[2]; // convert hex entity to decimal if (strtolower($matches[1]) === '&#x') { $code_point = hexdec($code_point); } $code_point = (int) $code_point; // replace invalid characters if ($code_point < 9 || ($code_point > 10 && $code_point < 13) || ($code_point > 13 && $code_point < 32) || ($code_point > 55295 && $code_point < 57344) || ($code_point > 65533 && $code_point < 65536) || $code_point > 1114111 ) { return ''; }; return $matches[0]; }, $data); // strip every utf-8 character than isn't in the range of valid XML 1.0 characters return (string) preg_replace('/[^\x{0009}\x{000A}\x{000D}\x{0020}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]/u', '', $data); } } picoFeed-0.1.18/lib/PicoFeed/Filter/Html.php000066400000000000000000000147171265625172200204270ustar00rootroot00000000000000config = new Config(); $this->input = XmlParser::htmlToXml($html); $this->output = ''; $this->tag = new Tag($this->config); $this->website = $website; $this->attribute = new Attribute(new Url($website)); } /** * Set config object. * * @param \PicoFeed\Config\Config $config Config instance * * @return \PicoFeed\Filter\Html */ public function setConfig($config) { $this->config = $config; if ($this->config !== null) { $this->attribute->setImageProxyCallback($this->config->getFilterImageProxyCallback()); $this->attribute->setImageProxyUrl($this->config->getFilterImageProxyUrl()); $this->attribute->setImageProxyProtocol($this->config->getFilterImageProxyProtocol()); $this->attribute->setIframeWhitelist($this->config->getFilterIframeWhitelist(array())); $this->attribute->setIntegerAttributes($this->config->getFilterIntegerAttributes(array())); $this->attribute->setAttributeOverrides($this->config->getFilterAttributeOverrides(array())); $this->attribute->setRequiredAttributes($this->config->getFilterRequiredAttributes(array())); $this->attribute->setMediaBlacklist($this->config->getFilterMediaBlacklist(array())); $this->attribute->setMediaAttributes($this->config->getFilterMediaAttributes(array())); $this->attribute->setSchemeWhitelist($this->config->getFilterSchemeWhitelist(array())); $this->attribute->setWhitelistedAttributes($this->config->getFilterWhitelistedTags(array())); $this->tag->setWhitelistedTags(array_keys($this->config->getFilterWhitelistedTags(array()))); } return $this; } /** * Run tags/attributes filtering. * * @return string */ public function execute() { $this->preFilter(); $parser = xml_parser_create(); xml_set_object($parser, $this); xml_set_element_handler($parser, 'startTag', 'endTag'); xml_set_character_data_handler($parser, 'dataTag'); xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, false); xml_parse($parser, $this->input, true); xml_parser_free($parser); $this->postFilter(); return $this->output; } /** * Called before XML parsing. */ public function preFilter() { $this->input = $this->tag->removeBlacklistedTags($this->input); } /** * Called after XML parsing. */ public function postFilter() { $this->output = $this->tag->removeEmptyTags($this->output); $this->output = $this->filterRules($this->output); $this->output = $this->tag->removeMultipleBreakTags($this->output); $this->output = trim($this->output); } /** * Called after XML parsing. * * @param string $content the content that should be filtered */ public function filterRules($content) { // the constructor should require a config, then this if can be removed if ($this->config === null) { $config = new Config(); } else { $config = $this->config; } $loader = new RuleLoader($config); $rules = $loader->getRules($this->website); $url = new Url($this->website); $sub_url = $url->getFullPath(); if (isset($rules['filter'])) { foreach ($rules['filter'] as $pattern => $rule) { if (preg_match($pattern, $sub_url)) { foreach ($rule as $search => $replace) { $content = preg_replace($search, $replace, $content); } } } } return $content; } /** * Parse opening tag. * * @param resource $parser XML parser * @param string $tag Tag name * @param array $attributes Tag attributes */ public function startTag($parser, $tag, array $attributes) { $this->empty = true; if ($this->tag->isAllowed($tag, $attributes)) { $attributes = $this->attribute->filter($tag, $attributes); if ($this->attribute->hasRequiredAttributes($tag, $attributes)) { $attributes = $this->attribute->addAttributes($tag, $attributes); $this->output .= $this->tag->openHtmlTag($tag, $this->attribute->toHtml($attributes)); $this->empty = false; } } $this->empty_tags[] = $this->empty; } /** * Parse closing tag. * * @param resource $parser XML parser * @param string $tag Tag name */ public function endTag($parser, $tag) { if (!array_pop($this->empty_tags) && $this->tag->isAllowedTag($tag)) { $this->output .= $this->tag->closeHtmlTag($tag); } } /** * Parse tag content. * * @param resource $parser XML parser * @param string $content Tag content */ public function dataTag($parser, $content) { // Replace   with normal space $content = str_replace("\xc2\xa0", ' ', $content); $this->output .= Filter::escape($content); } } picoFeed-0.1.18/lib/PicoFeed/Filter/Tag.php000066400000000000000000000110521265625172200202230ustar00rootroot00000000000000config = $config; } /** * Check if the tag is allowed and is not a pixel tracker. * * @param string $tag Tag name * @param array $attributes Attributes dictionary * * @return bool */ public function isAllowed($tag, array $attributes) { return $this->isAllowedTag($tag) && !$this->isPixelTracker($tag, $attributes); } /** * Return the HTML opening tag. * * @param string $tag Tag name * @param string $attributes Attributes converted in html * * @return string */ public function openHtmlTag($tag, $attributes = '') { return '<'.$tag.(empty($attributes) ? '' : ' '.$attributes).($this->isSelfClosingTag($tag) ? '/>' : '>'); } /** * Return the HTML closing tag. * * @param string $tag Tag name * * @return string */ public function closeHtmlTag($tag) { return $this->isSelfClosingTag($tag) ? '' : ''; } /** * Return true is the tag is self-closing. * * @param string $tag Tag name * * @return bool */ public function isSelfClosingTag($tag) { return $tag === 'br' || $tag === 'img'; } /** * Check if a tag is on the whitelist. * * @param string $tag Tag name * * @return bool */ public function isAllowedTag($tag) { return in_array($tag, array_merge( $this->tag_whitelist, array_keys($this->config->getFilterWhitelistedTags(array())) )); } /** * Detect if an image tag is a pixel tracker. * * @param string $tag Tag name * @param array $attributes Tag attributes * * @return bool */ public function isPixelTracker($tag, array $attributes) { return $tag === 'img' && isset($attributes['height']) && isset($attributes['width']) && $attributes['height'] == 1 && $attributes['width'] == 1; } /** * Remove script tags. * * @param string $data Input data * * @return string */ public function removeBlacklistedTags($data) { $dom = XmlParser::getDomDocument($data); if ($dom === false) { return ''; } $xpath = new DOMXpath($dom); $nodes = $xpath->query(implode(' | ', $this->tag_blacklist)); foreach ($nodes as $node) { $node->parentNode->removeChild($node); } return $dom->saveXML(); } /** * Remove empty tags. * * @param string $data Input data * * @return string */ public function removeEmptyTags($data) { return preg_replace('/<([^<\/>]*)>([\s]*?|(?R))<\/\1>/imsU', '', $data); } /** * Replace

by only one. * * @param string $data Input data * * @return string */ public function removeMultipleBreakTags($data) { return preg_replace("/(\s*)+/", '
', $data); } /** * Set whitelisted tags adn attributes for each tag. * * @param array $values List of tags: ['video' => ['src', 'cover'], 'img' => ['src']] * * @return Tag */ public function setWhitelistedTags(array $values) { $this->tag_whitelist = $values ?: $this->tag_whitelist; return $this; } } picoFeed-0.1.18/lib/PicoFeed/Logging/000077500000000000000000000000001265625172200171415ustar00rootroot00000000000000picoFeed-0.1.18/lib/PicoFeed/Logging/Logger.php000066400000000000000000000035661265625172200211030ustar00rootroot00000000000000format('Y-m-d H:i:s').'] '.$message; } } /** * Get all logged messages. * * @static * * @return array */ public static function getMessages() { return self::$messages; } /** * Remove all logged messages. * * @static */ public static function deleteMessages() { self::$messages = array(); } /** * Set a different timezone. * * @static * * @see http://php.net/manual/en/timezones.php * * @param string $timezone Timezone */ public static function setTimeZone($timezone) { self::$timezone = $timezone ?: self::$timezone; } /** * Get all messages serialized into a string. * * @static * * @return string */ public static function toString() { return implode(PHP_EOL, self::$messages).PHP_EOL; } } picoFeed-0.1.18/lib/PicoFeed/Parser/000077500000000000000000000000001265625172200170075ustar00rootroot00000000000000picoFeed-0.1.18/lib/PicoFeed/Parser/Atom.php000066400000000000000000000261171265625172200204270ustar00rootroot00000000000000 'http://www.w3.org/2005/Atom', ); /** * Get the path to the items XML tree. * * @param SimpleXMLElement $xml Feed xml * * @return SimpleXMLElement */ public function getItemsTree(SimpleXMLElement $xml) { return XmlParser::getXPathResult($xml, 'atom:entry', $this->namespaces) ?: XmlParser::getXPathResult($xml, 'entry'); } /** * Find the feed url. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedUrl(SimpleXMLElement $xml, Feed $feed) { $feed->feed_url = $this->getUrl($xml, 'self'); } /** * Find the site url. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findSiteUrl(SimpleXMLElement $xml, Feed $feed) { $feed->site_url = $this->getUrl($xml, 'alternate', true); } /** * Find the feed description. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedDescription(SimpleXMLElement $xml, Feed $feed) { $description = XmlParser::getXPathResult($xml, 'atom:subtitle', $this->namespaces) ?: XmlParser::getXPathResult($xml, 'subtitle'); $feed->description = (string) current($description); } /** * Find the feed logo url. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedLogo(SimpleXMLElement $xml, Feed $feed) { $logo = XmlParser::getXPathResult($xml, 'atom:logo', $this->namespaces) ?: XmlParser::getXPathResult($xml, 'logo'); $feed->logo = (string) current($logo); } /** * Find the feed icon. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedIcon(SimpleXMLElement $xml, Feed $feed) { $icon = XmlParser::getXPathResult($xml, 'atom:icon', $this->namespaces) ?: XmlParser::getXPathResult($xml, 'icon'); $feed->icon = (string) current($icon); } /** * Find the feed title. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedTitle(SimpleXMLElement $xml, Feed $feed) { $title = XmlParser::getXPathResult($xml, 'atom:title', $this->namespaces) ?: XmlParser::getXPathResult($xml, 'title'); $feed->title = Filter::stripWhiteSpace((string) current($title)) ?: $feed->getSiteUrl(); } /** * Find the feed language. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed) { $language = XmlParser::getXPathResult($xml, '*[not(self::atom:entry)]/@xml:lang', $this->namespaces) ?: XmlParser::getXPathResult($xml, '@xml:lang'); $feed->language = (string) current($language); } /** * Find the feed id. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedId(SimpleXMLElement $xml, Feed $feed) { $id = XmlParser::getXPathResult($xml, 'atom:id', $this->namespaces) ?: XmlParser::getXPathResult($xml, 'id'); $feed->id = (string) current($id); } /** * Find the feed date. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedDate(SimpleXMLElement $xml, Feed $feed) { $updated = XmlParser::getXPathResult($xml, 'atom:updated', $this->namespaces) ?: XmlParser::getXPathResult($xml, 'updated'); $feed->date = $this->date->getDateTime((string) current($updated)); } /** * Find the item date. * * @param SimpleXMLElement $entry Feed item * @param Item $item Item object * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed) { $published = XmlParser::getXPathResult($entry, 'atom:published', $this->namespaces) ?: XmlParser::getXPathResult($entry, 'published'); $updated = XmlParser::getXPathResult($entry, 'atom:updated', $this->namespaces) ?: XmlParser::getXPathResult($entry, 'updated'); $published = !empty($published) ? $this->date->getDateTime((string) current($published)) : null; $updated = !empty($updated) ? $this->date->getDateTime((string) current($updated)) : null; if ($published === null && $updated === null) { $item->date = $feed->getDate(); // We use the feed date if there is no date for the item } elseif ($published !== null && $updated !== null) { $item->date = max($published, $updated); // We use the most recent date between published and updated } else { $item->date = $updated ?: $published; } } /** * Find the item title. * * @param SimpleXMLElement $entry Feed item * @param Item $item Item object */ public function findItemTitle(SimpleXMLElement $entry, Item $item) { $title = XmlParser::getXPathResult($entry, 'atom:title', $this->namespaces) ?: XmlParser::getXPathResult($entry, 'title'); $item->title = Filter::stripWhiteSpace((string) current($title)) ?: $item->url; } /** * Find the item author. * * @param SimpleXMLElement $xml Feed * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item) { $author = XmlParser::getXPathResult($entry, 'atom:author/atom:name', $this->namespaces) ?: XmlParser::getXPathResult($entry, 'author/name') ?: XmlParser::getXPathResult($xml, 'atom:author/atom:name', $this->namespaces) ?: XmlParser::getXPathResult($xml, 'author/name'); $item->author = (string) current($author); } /** * Find the item content. * * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ public function findItemContent(SimpleXMLElement $entry, Item $item) { $item->content = $this->getContent($entry); } /** * Find the item URL. * * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ public function findItemUrl(SimpleXMLElement $entry, Item $item) { $item->url = $this->getUrl($entry, 'alternate', true); } /** * Genereate the item id. * * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed) { $id = XmlParser::getXPathResult($entry, 'atom:id', $this->namespaces) ?: XmlParser::getXPathResult($entry, 'id'); if (!empty($id)) { $item->id = $this->generateId((string) current($id)); } else { $item->id = $this->generateId( $item->getTitle(), $item->getUrl(), $item->getContent() ); } } /** * Find the item enclosure. * * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed) { $enclosure = $this->findLink($entry, 'enclosure'); if ($enclosure) { $item->enclosure_url = Url::resolve((string) $enclosure['href'], $feed->getSiteUrl()); $item->enclosure_type = (string) $enclosure['type']; } } /** * Find the item language. * * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed) { $language = XmlParser::getXPathResult($entry, './/@xml:lang'); $item->language = (string) current($language) ?: $feed->language; } /** * Get the URL from a link tag. * * @param SimpleXMLElement $xml XML tag * @param string $rel Link relationship: alternate, enclosure, related, self, via * * @return string */ private function getUrl(SimpleXMLElement $xml, $rel, $fallback = false) { $link = $this->findLink($xml, $rel); if ($link) { return (string) $link['href']; } if ($fallback) { $link = $this->findLink($xml, ''); return $link ? (string) $link['href'] : ''; } return ''; } /** * Get a link tag that match a relationship. * * @param SimpleXMLElement $xml XML tag * @param string $rel Link relationship: alternate, enclosure, related, self, via * * @return SimpleXMLElement|null */ private function findLink(SimpleXMLElement $xml, $rel) { $links = XmlParser::getXPathResult($xml, 'atom:link', $this->namespaces) ?: XmlParser::getXPathResult($xml, 'link'); foreach ($links as $link) { if ($rel === (string) $link['rel']) { return $link; } } return; } /** * Get the entry content. * * @param SimpleXMLElement $entry XML Entry * * @return string */ private function getContent(SimpleXMLElement $entry) { $content = current( XmlParser::getXPathResult($entry, 'atom:content', $this->namespaces) ?: XmlParser::getXPathResult($entry, 'content') ); if (!empty($content) && count($content->children())) { $xml_string = ''; foreach ($content->children() as $child) { $xml_string .= $child->asXML(); } return $xml_string; } elseif (trim((string) $content) !== '') { return (string) $content; } $summary = XmlParser::getXPathResult($entry, 'atom:summary', $this->namespaces) ?: XmlParser::getXPathResult($entry, 'summary'); return (string) current($summary); } } picoFeed-0.1.18/lib/PicoFeed/Parser/DateParser.php000066400000000000000000000047521265625172200215620ustar00rootroot00000000000000 length ]. * * @var array */ public $formats = array( DATE_ATOM => null, DATE_RSS => null, DATE_COOKIE => null, DATE_ISO8601 => null, DATE_RFC822 => null, DATE_RFC850 => null, DATE_RFC1036 => null, DATE_RFC1123 => null, DATE_RFC2822 => null, DATE_RFC3339 => null, 'D, d M Y H:i:s' => 25, 'D, d M Y h:i:s' => 25, 'D M d Y H:i:s' => 24, 'j M Y H:i:s' => 20, 'Y-m-d H:i:s' => 19, 'Y-m-d\TH:i:s' => 19, 'd/m/Y H:i:s' => 19, 'D, d M Y' => 16, 'Y-m-d' => 10, 'd-m-Y' => 10, 'm-d-Y' => 10, 'd.m.Y' => 10, 'm.d.Y' => 10, 'd/m/Y' => 10, 'm/d/Y' => 10, ); /** * Try to parse all date format for broken feeds. * * @param string $value Original date format * * @return DateTime */ public function getDateTime($value) { $value = trim($value); foreach ($this->formats as $format => $length) { $truncated_value = $value; if ($length !== null) { $truncated_value = substr($truncated_value, 0, $length); } $date = $this->getValidDate($format, $truncated_value); if ($date !== false) { return $date; } } return $this->getCurrentDateTime(); } /** * Get a valid date from a given format. * * @param string $format Date format * @param string $value Original date value * * @return DateTime|bool */ public function getValidDate($format, $value) { $date = DateTime::createFromFormat($format, $value, new DateTimeZone($this->timezone)); if ($date !== false) { $errors = DateTime::getLastErrors(); if ($errors['error_count'] === 0 && $errors['warning_count'] === 0) { return $date; } } return false; } /** * Get the current datetime. * * @return DateTime */ public function getCurrentDateTime() { return new DateTime('now', new DateTimeZone($this->timezone)); } } picoFeed-0.1.18/lib/PicoFeed/Parser/Feed.php000066400000000000000000000056511265625172200203720ustar00rootroot00000000000000$property.PHP_EOL; } $output .= 'Feed::date = '.$this->date->format(DATE_RFC822).PHP_EOL; $output .= 'Feed::isRTL() = '.($this->isRTL() ? 'true' : 'false').PHP_EOL; $output .= 'Feed::items = '.count($this->items).' items'.PHP_EOL; foreach ($this->items as $item) { $output .= '----'.PHP_EOL; $output .= $item; } return $output; } /** * Get title. */ public function getTitle() { return $this->title; } /** * Get description. */ public function getDescription() { return $this->description; } /** * Get the logo url. */ public function getLogo() { return $this->logo; } /** * Get the icon url. */ public function getIcon() { return $this->icon; } /** * Get feed url. */ public function getFeedUrl() { return $this->feed_url; } /** * Get site url. */ public function getSiteUrl() { return $this->site_url; } /** * Get date. */ public function getDate() { return $this->date; } /** * Get language. */ public function getLanguage() { return $this->language; } /** * Get id. */ public function getId() { return $this->id; } /** * Get feed items. */ public function getItems() { return $this->items; } /** * Return true if the feed is "Right to Left". * * @return bool */ public function isRTL() { return Parser::isLanguageRTL($this->language); } } picoFeed-0.1.18/lib/PicoFeed/Parser/Item.php000066400000000000000000000076771265625172200204370ustar00rootroot00000000000000xml, $query, $this->namespaces); if ($elements === false) { // xPath error return false; } return array_map(function ($element) { return (string) $element;}, $elements); } /** * Return item information. */ public function __toString() { $output = ''; foreach (array('id', 'title', 'url', 'language', 'author', 'enclosure_url', 'enclosure_type') as $property) { $output .= 'Item::'.$property.' = '.$this->$property.PHP_EOL; } $output .= 'Item::date = '.$this->date->format(DATE_RFC822).PHP_EOL; $output .= 'Item::isRTL() = '.($this->isRTL() ? 'true' : 'false').PHP_EOL; $output .= 'Item::content = '.strlen($this->content).' bytes'.PHP_EOL; return $output; } /** * Get title. */ public function getTitle() { return $this->title; } /** * Get url. */ public function getUrl() { return $this->url; } /** * Get id. */ public function getId() { return $this->id; } /** * Get date. */ public function getDate() { return $this->date; } /** * Get content. */ public function getContent() { return $this->content; } /** * Get enclosure url. */ public function getEnclosureUrl() { return $this->enclosure_url; } /** * Get enclosure type. */ public function getEnclosureType() { return $this->enclosure_type; } /** * Get language. */ public function getLanguage() { return $this->language; } /** * Get author. */ public function getAuthor() { return $this->author; } /** * Return true if the item is "Right to Left". * * @return bool */ public function isRTL() { return Parser::isLanguageRTL($this->language); } } picoFeed-0.1.18/lib/PicoFeed/Parser/MalformedXmlException.php000066400000000000000000000002471265625172200237710ustar00rootroot00000000000000date = new DateParser(); $this->fallback_url = $fallback_url; $xml_encoding = XmlParser::getEncodingFromXmlTag($content); // Strip XML tag to avoid multiple encoding/decoding in the next XML processing $this->content = Filter::stripXmlTag($content); // Encode everything in UTF-8 Logger::setMessage(get_called_class().': HTTP Encoding "'.$http_encoding.'" ; XML Encoding "'.$xml_encoding.'"'); $this->content = Encoding::convert($this->content, $xml_encoding ?: $http_encoding); } /** * Parse the document. * * @return \PicoFeed\Parser\Feed */ public function execute() { Logger::setMessage(get_called_class().': begin parsing'); $xml = XmlParser::getSimpleXml($this->content); if ($xml === false) { Logger::setMessage(get_called_class().': Applying XML workarounds'); $this->content = Filter::normalizeData($this->content); $xml = XmlParser::getSimpleXml($this->content); if ($xml === false) { Logger::setMessage(get_called_class().': XML parsing error'); Logger::setMessage(XmlParser::getErrors()); throw new MalformedXmlException('XML parsing error'); } } $this->used_namespaces = $xml->getNamespaces(true); $xml = $this->registerSupportedNamespaces($xml); $feed = new Feed(); $this->findFeedUrl($xml, $feed); $this->checkFeedUrl($feed); $this->findSiteUrl($xml, $feed); $this->checkSiteUrl($feed); $this->findFeedTitle($xml, $feed); $this->findFeedDescription($xml, $feed); $this->findFeedLanguage($xml, $feed); $this->findFeedId($xml, $feed); $this->findFeedDate($xml, $feed); $this->findFeedLogo($xml, $feed); $this->findFeedIcon($xml, $feed); foreach ($this->getItemsTree($xml) as $entry) { $entry = $this->registerSupportedNamespaces($entry); $item = new Item(); $item->xml = $entry; $item->namespaces = $this->used_namespaces; $this->findItemAuthor($xml, $entry, $item); $this->findItemUrl($entry, $item); $this->checkItemUrl($feed, $item); $this->findItemTitle($entry, $item); $this->findItemContent($entry, $item); // Id generation can use the item url/title/content (order is important) $this->findItemId($entry, $item, $feed); $this->findItemDate($entry, $item, $feed); $this->findItemEnclosure($entry, $item, $feed); $this->findItemLanguage($entry, $item, $feed); // Order is important (avoid double filtering) $this->filterItemContent($feed, $item); $this->scrapWebsite($item); $feed->items[] = $item; } Logger::setMessage(get_called_class().PHP_EOL.$feed); return $feed; } /** * Check if the feed url is correct. * * @param Feed $feed Feed object */ public function checkFeedUrl(Feed $feed) { if ($feed->getFeedUrl() === '') { $feed->feed_url = $this->fallback_url; } else { $feed->feed_url = Url::resolve($feed->getFeedUrl(), $this->fallback_url); } } /** * Check if the site url is correct. * * @param Feed $feed Feed object */ public function checkSiteUrl(Feed $feed) { if ($feed->getSiteUrl() === '') { $feed->site_url = Url::base($feed->getFeedUrl()); } else { $feed->site_url = Url::resolve($feed->getSiteUrl(), $this->fallback_url); } } /** * Check if the item url is correct. * * @param Feed $feed Feed object * @param Item $item Item object */ public function checkItemUrl(Feed $feed, Item $item) { $item->url = Url::resolve($item->getUrl(), $feed->getSiteUrl()); } /** * Fetch item content with the content grabber. * * @param Item $item Item object */ public function scrapWebsite(Item $item) { if ($this->enable_grabber && !in_array($item->getUrl(), $this->grabber_ignore_urls)) { $grabber = new Scraper($this->config); $grabber->setUrl($item->getUrl()); if ($this->grabber_needs_rule_file) { $grabber->disableCandidateParser(); } $grabber->execute(); if ($grabber->hasRelevantContent()) { $item->content = $grabber->getFilteredContent(); } } } /** * Filter HTML for entry content. * * @param Feed $feed Feed object * @param Item $item Item object */ public function filterItemContent(Feed $feed, Item $item) { if ($this->isFilteringEnabled()) { $filter = Filter::html($item->getContent(), $feed->getSiteUrl()); $filter->setConfig($this->config); $item->content = $filter->execute(); } else { Logger::setMessage(get_called_class().': Content filtering disabled'); } } /** * Generate a unique id for an entry (hash all arguments). * * @return string */ public function generateId() { return hash($this->hash_algo, implode(func_get_args())); } /** * Return true if the given language is "Right to Left". * * @static * * @param string $language Language: fr-FR, en-US * * @return bool */ public static function isLanguageRTL($language) { $language = strtolower($language); $rtl_languages = array( 'ar', // Arabic (ar-**) 'fa', // Farsi (fa-**) 'ur', // Urdu (ur-**) 'ps', // Pashtu (ps-**) 'syr', // Syriac (syr-**) 'dv', // Divehi (dv-**) 'he', // Hebrew (he-**) 'yi', // Yiddish (yi-**) ); foreach ($rtl_languages as $prefix) { if (strpos($language, $prefix) === 0) { return true; } } return false; } /** * Set Hash algorithm used for id generation. * * @param string $algo Algorithm name * * @return \PicoFeed\Parser\Parser */ public function setHashAlgo($algo) { $this->hash_algo = $algo ?: $this->hash_algo; return $this; } /** * Set a different timezone. * * @see http://php.net/manual/en/timezones.php * * @param string $timezone Timezone * * @return \PicoFeed\Parser\Parser */ public function setTimezone($timezone) { if ($timezone) { $this->date->timezone = $timezone; } return $this; } /** * Set config object. * * @param \PicoFeed\Config\Config $config Config instance * * @return \PicoFeed\Parser\Parser */ public function setConfig($config) { $this->config = $config; return $this; } /** * Enable the content grabber. * * @return \PicoFeed\Parser\Parser */ public function disableContentFiltering() { $this->enable_filter = false; } /** * Return true if the content filtering is enabled. * * @return bool */ public function isFilteringEnabled() { if ($this->config === null) { return $this->enable_filter; } return $this->config->getContentFiltering($this->enable_filter); } /** * Enable the content grabber. * * @param bool $needs_rule_file true if only pages with rule files should be * scraped * * @return \PicoFeed\Parser\Parser */ public function enableContentGrabber($needs_rule_file = false) { $this->enable_grabber = true; $this->grabber_needs_rule_file = $needs_rule_file; } /** * Set ignored URLs for the content grabber. * * @param array $urls URLs * * @return \PicoFeed\Parser\Parser */ public function setGrabberIgnoreUrls(array $urls) { $this->grabber_ignore_urls = $urls; } /** * Register all supported namespaces to be used within an xpath query. * * @param SimpleXMLElement $xml Feed xml * * @return SimpleXMLElement */ public function registerSupportedNamespaces(SimpleXMLElement $xml) { foreach ($this->namespaces as $prefix => $ns) { $xml->registerXPathNamespace($prefix, $ns); } return $xml; } /** * Find the feed url. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ abstract public function findFeedUrl(SimpleXMLElement $xml, Feed $feed); /** * Find the site url. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ abstract public function findSiteUrl(SimpleXMLElement $xml, Feed $feed); /** * Find the feed title. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ abstract public function findFeedTitle(SimpleXMLElement $xml, Feed $feed); /** * Find the feed description. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ abstract public function findFeedDescription(SimpleXMLElement $xml, Feed $feed); /** * Find the feed language. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ abstract public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed); /** * Find the feed id. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ abstract public function findFeedId(SimpleXMLElement $xml, Feed $feed); /** * Find the feed date. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ abstract public function findFeedDate(SimpleXMLElement $xml, Feed $feed); /** * Find the feed logo url. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ abstract public function findFeedLogo(SimpleXMLElement $xml, Feed $feed); /** * Find the feed icon. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ abstract public function findFeedIcon(SimpleXMLElement $xml, Feed $feed); /** * Get the path to the items XML tree. * * @param SimpleXMLElement $xml Feed xml * * @return SimpleXMLElement */ abstract public function getItemsTree(SimpleXMLElement $xml); /** * Find the item author. * * @param SimpleXMLElement $xml Feed * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ abstract public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item); /** * Find the item URL. * * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ abstract public function findItemUrl(SimpleXMLElement $entry, Item $item); /** * Find the item title. * * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ abstract public function findItemTitle(SimpleXMLElement $entry, Item $item); /** * Genereate the item id. * * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object * @param \PicoFeed\Parser\Feed $feed Feed object */ abstract public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed); /** * Find the item date. * * @param SimpleXMLElement $entry Feed item * @param Item $item Item object * @param \PicoFeed\Parser\Feed $feed Feed object */ abstract public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed); /** * Find the item content. * * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ abstract public function findItemContent(SimpleXMLElement $entry, Item $item); /** * Find the item enclosure. * * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object * @param \PicoFeed\Parser\Feed $feed Feed object */ abstract public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed); /** * Find the item language. * * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object * @param \PicoFeed\Parser\Feed $feed Feed object */ abstract public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed); } picoFeed-0.1.18/lib/PicoFeed/Parser/ParserException.php000066400000000000000000000003071265625172200226330ustar00rootroot00000000000000 'http://purl.org/rss/1.0/', 'dc' => 'http://purl.org/dc/elements/1.1/', 'content' => 'http://purl.org/rss/1.0/modules/content/', 'feedburner' => 'http://rssnamespace.org/feedburner/ext/1.0', ); /** * Get the path to the items XML tree. * * @param SimpleXMLElement $xml Feed xml * * @return SimpleXMLElement */ public function getItemsTree(SimpleXMLElement $xml) { return XmlParser::getXPathResult($xml, 'rss:item', $this->namespaces) ?: XmlParser::getXPathResult($xml, 'item'); } /** * Find the feed url. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedUrl(SimpleXMLElement $xml, Feed $feed) { $feed->feed_url = ''; } /** * Find the site url. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findSiteUrl(SimpleXMLElement $xml, Feed $feed) { $site_url = XmlParser::getXPathResult($xml, 'rss:channel/rss:link', $this->namespaces) ?: XmlParser::getXPathResult($xml, 'channel/link'); $feed->site_url = (string) current($site_url); } /** * Find the feed description. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedDescription(SimpleXMLElement $xml, Feed $feed) { $description = XmlParser::getXPathResult($xml, 'rss:channel/rss:description', $this->namespaces) ?: XmlParser::getXPathResult($xml, 'channel/description'); $feed->description = (string) current($description); } /** * Find the feed logo url. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedLogo(SimpleXMLElement $xml, Feed $feed) { $logo = XmlParser::getXPathResult($xml, 'rss:image/rss:url', $this->namespaces) ?: XmlParser::getXPathResult($xml, 'image/url'); $feed->logo = (string) current($logo); } /** * Find the feed icon. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedIcon(SimpleXMLElement $xml, Feed $feed) { $feed->icon = ''; } /** * Find the feed title. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedTitle(SimpleXMLElement $xml, Feed $feed) { $title = XmlParser::getXPathResult($xml, 'rss:channel/rss:title', $this->namespaces) ?: XmlParser::getXPathResult($xml, 'channel/title'); $feed->title = Filter::stripWhiteSpace((string) current($title)) ?: $feed->getSiteUrl(); } /** * Find the feed language. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed) { $language = XmlParser::getXPathResult($xml, 'rss:channel/dc:language', $this->namespaces) ?: XmlParser::getXPathResult($xml, 'channel/dc:language', $this->namespaces); $feed->language = (string) current($language); } /** * Find the feed id. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedId(SimpleXMLElement $xml, Feed $feed) { $feed->id = $feed->getFeedUrl() ?: $feed->getSiteUrl(); } /** * Find the feed date. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedDate(SimpleXMLElement $xml, Feed $feed) { $date = XmlParser::getXPathResult($xml, 'rss:channel/dc:date', $this->namespaces) ?: XmlParser::getXPathResult($xml, 'channel/dc:date', $this->namespaces); $feed->date = $this->date->getDateTime((string) current($date)); } /** * Find the item date. * * @param SimpleXMLElement $entry Feed item * @param Item $item Item object * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed) { $date = XmlParser::getXPathResult($entry, 'dc:date', $this->namespaces); $item->date = empty($date) ? $feed->getDate() : $this->date->getDateTime((string) current($date)); } /** * Find the item title. * * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ public function findItemTitle(SimpleXMLElement $entry, Item $item) { $title = XmlParser::getXPathResult($entry, 'rss:title', $this->namespaces) ?: XmlParser::getXPathResult($entry, 'title'); $item->title = Filter::stripWhiteSpace((string) current($title)) ?: $item->url; } /** * Find the item author. * * @param SimpleXMLElement $xml Feed * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item) { $author = XmlParser::getXPathResult($entry, 'dc:creator', $this->namespaces) ?: XmlParser::getXPathResult($xml, 'rss:channel/dc:creator', $this->namespaces) ?: XmlParser::getXPathResult($xml, 'channel/dc:creator', $this->namespaces); $item->author = (string) current($author); } /** * Find the item content. * * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ public function findItemContent(SimpleXMLElement $entry, Item $item) { $content = XmlParser::getXPathResult($entry, 'content:encoded', $this->namespaces); if (trim((string) current($content)) === '') { $content = XmlParser::getXPathResult($entry, 'rss:description', $this->namespaces) ?: XmlParser::getXPathResult($entry, 'description'); } $item->content = (string) current($content); } /** * Find the item URL. * * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ public function findItemUrl(SimpleXMLElement $entry, Item $item) { $link = XmlParser::getXPathResult($entry, 'feedburner:origLink', $this->namespaces) ?: XmlParser::getXPathResult($entry, 'rss:link', $this->namespaces) ?: XmlParser::getXPathResult($entry, 'link'); $item->url = trim((string) current($link)); } /** * Genereate the item id. * * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed) { $item->id = $this->generateId( $item->getTitle(), $item->getUrl(), $item->getContent() ); } /** * Find the item enclosure. * * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed) { } /** * Find the item language. * * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed) { $language = XmlParser::getXPathResult($entry, 'dc:language', $this->namespaces); $item->language = (string) current($language) ?: $feed->language; } } picoFeed-0.1.18/lib/PicoFeed/Parser/Rss20.php000066400000000000000000000222621265625172200204350ustar00rootroot00000000000000 'http://purl.org/dc/elements/1.1/', 'content' => 'http://purl.org/rss/1.0/modules/content/', 'feedburner' => 'http://rssnamespace.org/feedburner/ext/1.0', 'atom' => 'http://www.w3.org/2005/Atom', ); /** * Get the path to the items XML tree. * * @param SimpleXMLElement $xml Feed xml * * @return SimpleXMLElement */ public function getItemsTree(SimpleXMLElement $xml) { return XmlParser::getXPathResult($xml, 'channel/item'); } /** * Find the feed url. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedUrl(SimpleXMLElement $xml, Feed $feed) { $feed->feed_url = ''; } /** * Find the site url. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findSiteUrl(SimpleXMLElement $xml, Feed $feed) { $site_url = XmlParser::getXPathResult($xml, 'channel/link'); $feed->site_url = (string) current($site_url); } /** * Find the feed description. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedDescription(SimpleXMLElement $xml, Feed $feed) { $description = XmlParser::getXPathResult($xml, 'channel/description'); $feed->description = (string) current($description); } /** * Find the feed logo url. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedLogo(SimpleXMLElement $xml, Feed $feed) { $logo = XmlParser::getXPathResult($xml, 'channel/image/url'); $feed->logo = (string) current($logo); } /** * Find the feed icon. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedIcon(SimpleXMLElement $xml, Feed $feed) { $feed->icon = ''; } /** * Find the feed title. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedTitle(SimpleXMLElement $xml, Feed $feed) { $title = XmlParser::getXPathResult($xml, 'channel/title'); $feed->title = Filter::stripWhiteSpace((string) current($title)) ?: $feed->getSiteUrl(); } /** * Find the feed language. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedLanguage(SimpleXMLElement $xml, Feed $feed) { $language = XmlParser::getXPathResult($xml, 'channel/language'); $feed->language = (string) current($language); } /** * Find the feed id. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedId(SimpleXMLElement $xml, Feed $feed) { $feed->id = $feed->getFeedUrl() ?: $feed->getSiteUrl(); } /** * Find the feed date. * * @param SimpleXMLElement $xml Feed xml * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findFeedDate(SimpleXMLElement $xml, Feed $feed) { $publish_date = XmlParser::getXPathResult($xml, 'channel/pubDate'); $update_date = XmlParser::getXPathResult($xml, 'channel/lastBuildDate'); $published = !empty($publish_date) ? $this->date->getDateTime((string) current($publish_date)) : null; $updated = !empty($update_date) ? $this->date->getDateTime((string) current($update_date)) : null; if ($published === null && $updated === null) { $feed->date = $this->date->getCurrentDateTime(); // We use the current date if there is no date for the feed } elseif ($published !== null && $updated !== null) { $feed->date = max($published, $updated); // We use the most recent date between published and updated } else { $feed->date = $updated ?: $published; } } /** * Find the item date. * * @param SimpleXMLElement $entry Feed item * @param Item $item Item object * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findItemDate(SimpleXMLElement $entry, Item $item, Feed $feed) { $date = XmlParser::getXPathResult($entry, 'pubDate'); $item->date = empty($date) ? $feed->getDate() : $this->date->getDateTime((string) current($date)); } /** * Find the item title. * * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ public function findItemTitle(SimpleXMLElement $entry, Item $item) { $title = XmlParser::getXPathResult($entry, 'title'); $item->title = Filter::stripWhiteSpace((string) current($title)) ?: $item->url; } /** * Find the item author. * * @param SimpleXMLElement $xml Feed * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ public function findItemAuthor(SimpleXMLElement $xml, SimpleXMLElement $entry, Item $item) { $author = XmlParser::getXPathResult($entry, 'dc:creator', $this->namespaces) ?: XmlParser::getXPathResult($entry, 'author') ?: XmlParser::getXPathResult($xml, 'channel/dc:creator', $this->namespaces) ?: XmlParser::getXPathResult($xml, 'channel/managingEditor'); $item->author = (string) current($author); } /** * Find the item content. * * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ public function findItemContent(SimpleXMLElement $entry, Item $item) { $content = XmlParser::getXPathResult($entry, 'content:encoded', $this->namespaces); if (trim((string) current($content)) === '') { $content = XmlParser::getXPathResult($entry, 'description'); } $item->content = (string) current($content); } /** * Find the item URL. * * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object */ public function findItemUrl(SimpleXMLElement $entry, Item $item) { $link = XmlParser::getXPathResult($entry, 'feedburner:origLink', $this->namespaces) ?: XmlParser::getXPathResult($entry, 'link') ?: XmlParser::getXPathResult($entry, 'atom:link/@href', $this->namespaces); if (!empty($link)) { $item->url = trim((string) current($link)); } else { $link = XmlParser::getXPathResult($entry, 'guid'); $link = trim((string) current($link)); if (filter_var($link, FILTER_VALIDATE_URL) !== false) { $item->url = $link; } } } /** * Genereate the item id. * * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findItemId(SimpleXMLElement $entry, Item $item, Feed $feed) { $id = (string) current(XmlParser::getXPathResult($entry, 'guid')); if ($id) { $item->id = $this->generateId($id); } else { $item->id = $this->generateId( $item->getTitle(), $item->getUrl(), $item->getContent() ); } } /** * Find the item enclosure. * * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findItemEnclosure(SimpleXMLElement $entry, Item $item, Feed $feed) { if (isset($entry->enclosure)) { $enclosure_url = XmlParser::getXPathResult($entry, 'feedburner:origEnclosureLink', $this->namespaces) ?: XmlParser::getXPathResult($entry, 'enclosure/@url'); $enclosure_type = XmlParser::getXPathResult($entry, 'enclosure/@type'); $item->enclosure_url = Url::resolve((string) current($enclosure_url), $feed->getSiteUrl()); $item->enclosure_type = (string) current($enclosure_type); } } /** * Find the item language. * * @param SimpleXMLElement $entry Feed item * @param \PicoFeed\Parser\Item $item Item object * @param \PicoFeed\Parser\Feed $feed Feed object */ public function findItemLanguage(SimpleXMLElement $entry, Item $item, Feed $feed) { $language = XmlParser::getXPathResult($entry, 'dc:language', $this->namespaces); $item->language = (string) current($language) ?: $feed->language; } } picoFeed-0.1.18/lib/PicoFeed/Parser/Rss91.php000066400000000000000000000001751265625172200204440ustar00rootroot00000000000000childNodes->length === 0) { return false; } return $dom; } /** * Small wrapper around ZendXml to turn their exceptions into picoFeed * exceptions * @param $input the xml to load * @param $dom pass in a dom document or use null/omit if simpleXml should * be used */ private static function scan($input, $dom=null) { try { return Security::scan($input, $dom); } catch(\ZendXml\Exception\RuntimeException $e) { throw new XmlEntityException($e->getMessage()); } } /** * Load HTML document by using a DomDocument instance or return false on failure. * * @static * * @param string $input XML content * * @return \DOMDocument */ public static function getHtmlDocument($input) { $dom = new DomDocument(); if (empty($input)) { return $dom; } libxml_use_internal_errors(true); if (version_compare(PHP_VERSION, '5.4.0', '>=')) { $dom->loadHTML($input, LIBXML_NONET); } else { $dom->loadHTML($input); } return $dom; } /** * Convert a HTML document to XML. * * @static * * @param string $html HTML document * * @return string */ public static function htmlToXml($html) { $dom = self::getHtmlDocument(''.$html); return $dom->saveXML($dom->getElementsByTagName('body')->item(0)); } /** * Get XML parser errors. * * @static * * @return string */ public static function getErrors() { $errors = array(); foreach (libxml_get_errors() as $error) { $errors[] = sprintf('XML error: %s (Line: %d - Column: %d - Code: %d)', $error->message, $error->line, $error->column, $error->code ); } return implode(', ', $errors); } /** * Get the encoding from a xml tag. * * @static * * @param string $data Input data * * @return string */ public static function getEncodingFromXmlTag($data) { $encoding = ''; if (strpos($data, '')); $data = str_replace("'", '"', $data); $p1 = strpos($data, 'encoding='); $p2 = strpos($data, '"', $p1 + 10); if ($p1 !== false && $p2 !== false) { $encoding = substr($data, $p1 + 10, $p2 - $p1 - 10); $encoding = strtolower($encoding); } } return $encoding; } /** * Get the charset from a meta tag. * * @static * * @param string $data Input data * * @return string */ public static function getEncodingFromMetaTag($data) { $encoding = ''; if (preg_match('/;]+)/i', $data, $match) === 1) { $encoding = strtolower($match[1]); } return $encoding; } /** * Rewrite XPath query to use namespace-uri and local-name derived from prefix. * * @param string $query XPath query * @param array $ns Prefix to namespace URI mapping * * @return string */ public static function replaceXPathPrefixWithNamespaceURI($query, array $ns) { return preg_replace_callback('/([A-Z0-9]+):([A-Z0-9]+)/iu', function ($matches) use ($ns) { // don't try to map the special prefix XML if (strtolower($matches[1]) === 'xml') { return $matches[0]; } return '*[namespace-uri()="'.$ns[$matches[1]].'" and local-name()="'.$matches[2].'"]'; }, $query); } /** * Get the result elements of a XPath query. * * @param \SimpleXMLElement $xml XML element * @param string $query XPath query * @param array $ns Prefix to namespace URI mapping * * @return \SimpleXMLElement */ public static function getXPathResult(SimpleXMLElement $xml, $query, array $ns = array()) { if (!empty($ns)) { $query = static::replaceXPathPrefixWithNamespaceURI($query, $ns); } return $xml->xpath($query); } } picoFeed-0.1.18/lib/PicoFeed/PicoFeedException.php000066400000000000000000000002531265625172200216210ustar00rootroot00000000000000config = $config ?: new Config(); } /** * Get the icon file content (available only after the download). * * @return string */ public function getContent() { return $this->content; } /** * Get the icon file type (available only after the download). * * @return string */ public function getType() { foreach ($this->types as $type) { if (strpos($this->content_type, $type) === 0) { return $type; } } return 'image/x-icon'; } /** * Get data URI (http://en.wikipedia.org/wiki/Data_URI_scheme). * * @return string */ public function getDataUri() { if (empty($this->content)) { return ''; } return sprintf( 'data:%s;base64,%s', $this->getType(), base64_encode($this->content) ); } /** * Download and check if a resource exists. * * @param string $url URL * * @return \PicoFeed\Client Client instance */ public function download($url) { $client = Client::getInstance(); $client->setConfig($this->config); Logger::setMessage(get_called_class().' Download => '.$url); try { $client->execute($url); } catch (ClientException $e) { Logger::setMessage(get_called_class().' Download Failed => '.$e->getMessage()); } return $client; } /** * Check if a remote file exists. * * @param string $url URL * * @return bool */ public function exists($url) { return $this->download($url)->getContent() !== ''; } /** * Get the icon link for a website. * * @param string $website_link URL * @param string $favicon_link optional URL * * @return string */ public function find($website_link, $favicon_link = '') { $website = new Url($website_link); if ($favicon_link !== '') { $icons = array($favicon_link); } else { $icons = $this->extract($this->download($website->getBaseUrl('/'))->getContent()); $icons[] = $website->getBaseUrl('/favicon.ico'); } foreach ($icons as $icon_link) { $icon_link = Url::resolve($icon_link, $website); $resource = $this->download($icon_link); $this->content = $resource->getContent(); $this->content_type = $resource->getContentType(); if ($this->content !== '') { return $icon_link; } elseif ($favicon_link !== '') { return $this->find($website_link); } } return ''; } /** * Extract the icon links from the HTML. * * @param string $html HTML * * @return array */ public function extract($html) { $icons = array(); if (empty($html)) { return $icons; } $dom = XmlParser::getHtmlDocument($html); $xpath = new DOMXpath($dom); $elements = $xpath->query('//link[@rel="icon" or @rel="shortcut icon" or @rel="icon shortcut"]'); for ($i = 0; $i < $elements->length; ++$i) { $icons[] = $elements->item($i)->getAttribute('href'); } return $icons; } } picoFeed-0.1.18/lib/PicoFeed/Reader/Reader.php000066400000000000000000000130771265625172200207000ustar00rootroot00000000000000 '//feed', 'Rss20' => '//rss[@version="2.0"]', 'Rss92' => '//rss[@version="0.92"]', 'Rss91' => '//rss[@version="0.91"]', 'Rss10' => '//rdf', ); /** * Config class instance. * * @var \PicoFeed\Config\Config */ private $config; /** * Constructor. * * @param \PicoFeed\Config\Config $config Config class instance */ public function __construct(Config $config = null) { $this->config = $config ?: new Config(); Logger::setTimezone($this->config->getTimezone()); } /** * Download a feed (no discovery). * * @param string $url Feed url * @param string $last_modified Last modified HTTP header * @param string $etag Etag HTTP header * @param string $username HTTP basic auth username * @param string $password HTTP basic auth password * * @return \PicoFeed\Client\Client */ public function download($url, $last_modified = '', $etag = '', $username = '', $password = '') { $url = $this->prependScheme($url); return Client::getInstance() ->setConfig($this->config) ->setLastModified($last_modified) ->setEtag($etag) ->setUsername($username) ->setPassword($password) ->execute($url); } /** * Discover and download a feed. * * @param string $url Feed or website url * @param string $last_modified Last modified HTTP header * @param string $etag Etag HTTP header * @param string $username HTTP basic auth username * @param string $password HTTP basic auth password * * @return \PicoFeed\Client\Client */ public function discover($url, $last_modified = '', $etag = '', $username = '', $password = '') { $client = $this->download($url, $last_modified, $etag, $username, $password); // It's already a feed or the feed was not modified if (!$client->isModified() || $this->detectFormat($client->getContent())) { return $client; } // Try to find a subscription $links = $this->find($client->getUrl(), $client->getContent()); if (empty($links)) { throw new SubscriptionNotFoundException('Unable to find a subscription'); } return $this->download($links[0], $last_modified, $etag, $username, $password); } /** * Find feed urls inside a HTML document. * * @param string $url Website url * @param string $html HTML content * * @return array List of feed links */ public function find($url, $html) { Logger::setMessage(get_called_class().': Try to discover subscriptions'); $dom = XmlParser::getHtmlDocument($html); $xpath = new DOMXPath($dom); $links = array(); $queries = array( '//link[@type="application/rss+xml"]', '//link[@type="application/atom+xml"]', ); foreach ($queries as $query) { $nodes = $xpath->query($query); foreach ($nodes as $node) { $link = $node->getAttribute('href'); if (!empty($link)) { $feedUrl = new Url($link); $siteUrl = new Url($url); $links[] = $feedUrl->getAbsoluteUrl($feedUrl->isRelativeUrl() ? $siteUrl->getBaseUrl() : ''); } } } Logger::setMessage(get_called_class().': '.implode(', ', $links)); return $links; } /** * Get a parser instance. * * @param string $url Site url * @param string $content Feed content * @param string $encoding HTTP encoding * * @return \PicoFeed\Parser\Parser */ public function getParser($url, $content, $encoding) { $format = $this->detectFormat($content); if (empty($format)) { throw new UnsupportedFeedFormatException('Unable to detect feed format'); } $className = '\PicoFeed\Parser\\'.$format; $parser = new $className($content, $encoding, $url); $parser->setHashAlgo($this->config->getParserHashAlgo()); $parser->setTimezone($this->config->getTimezone()); $parser->setConfig($this->config); return $parser; } /** * Detect the feed format. * * @param string $content Feed content * * @return string */ public function detectFormat($content) { $dom = XmlParser::getHtmlDocument($content); $xpath = new DOMXPath($dom); foreach ($this->formats as $parser_name => $query) { $nodes = $xpath->query($query); if ($nodes->length === 1) { return $parser_name; } } return ''; } /** * Add the prefix "http://" if the end-user just enter a domain name. * * @param string $url Url * @retunr string */ public function prependScheme($url) { if (!preg_match('%^https?://%', $url)) { $url = 'http://'.$url; } return $url; } } picoFeed-0.1.18/lib/PicoFeed/Reader/ReaderException.php000066400000000000000000000003071265625172200225470ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://combat.blog.lemonde.fr/2013/08/31/teddy-riner-le-rookie-devenu-rambo/#xtor=RSS-3208', 'body' => array( '//div[@class="entry-content"]', ), 'strip' => array( '//*[contains(@class, "fb-like") or contains(@class, "social")]' ), ) ) ); picoFeed-0.1.18/lib/PicoFeed/Rules/.blogs.nytimes.com.php000066400000000000000000000006661265625172200230160ustar00rootroot00000000000000 array( '%.*%' => array( 'title' => '//header/h1', 'test_url' => 'http://bits.blogs.nytimes.com/2012/01/16/wikipedia-plans-to-go-dark-on-wednesday-to-protest-sopa/', 'body' => array( '//div[@class="postContent"]', ), 'strip' => array( '//*[@class="shareToolsBox"]', ), ) ) ); picoFeed-0.1.18/lib/PicoFeed/Rules/.igen.fr.php000066400000000000000000000005461265625172200207710ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.igen.fr/ailleurs/2014/05/nvidia-va-delaisser-les-smartphones-grand-public-86031', 'body' => array( '//div[contains(@class, "field-name-body")]' ), 'strip' => array( ), ) ) ); picoFeed-0.1.18/lib/PicoFeed/Rules/.nytimes.com.php000066400000000000000000000004211265625172200216760ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.nytimes.com/2011/05/15/world/middleeast/15prince.html', 'body' => array( '//div[@class="articleBody"]', ), ) ) ); picoFeed-0.1.18/lib/PicoFeed/Rules/.phoronix.com.php000066400000000000000000000004641265625172200220630ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.phoronix.com/scan.php?page=article&item=amazon_ec2_bare&num=1', 'body' => array( '//div[@class="content"]', ), 'strip' => array() ) ) ); picoFeed-0.1.18/lib/PicoFeed/Rules/.slate.com.php000066400000000000000000000015341265625172200213240ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.slate.com/articles/business/moneybox/2013/08/microsoft_ceo_steve_ballmer_retires_a_firsthand_account_of_the_company_s.html', 'body' => array( '//div[@class="sl-art-body"]', ), 'strip' => array( '//*[contains(@class, "social") or contains(@class, "comments") or contains(@class, "sl-article-floatin-tools") or contains(@class, "sl-art-pag")]', '//*[@id="mys_slate_logged_in"]', '//*[@id="sl_article_tools_myslate_bottom"]', '//*[@id="mys_myslate"]', '//*[@class="sl-viral-container"]', '//*[@class="sl-art-creds-cntr"]', '//*[@class="sl-art-ad-midflex"]', ) ) ) ); picoFeed-0.1.18/lib/PicoFeed/Rules/.theguardian.com.php000066400000000000000000000006731265625172200225120ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.theguardian.com/sustainable-business/2015/feb/02/2015-hyper-transparency-global-business', 'body' => array( '//div[contains(@class, "content__main-column--article")]', ), 'strip' => array( '//div[contains(@class, "meta-container")]', ), ) ) ); picoFeed-0.1.18/lib/PicoFeed/Rules/.wikipedia.org.php000066400000000000000000000020661265625172200221740ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'https://en.wikipedia.org/wiki/Grace_Hopper', 'body' => array( '//div[@id="bodyContent"]', ), 'strip' => array( "//div[@id='toc']", "//div[@id='catlinks']", "//div[@id='jump-to-nav']", "//div[@class='thumbcaption']//div[@class='magnify']", "//table[@class='navbox']", "//table[contains(@class, 'infobox')]", "//div[@class='dablink']", "//div[@id='contentSub']", "//div[@id='siteSub']", "//table[@id='persondata']", "//table[contains(@class, 'metadata')]", "//*[contains(@class, 'noprint')]", "//*[contains(@class, 'printfooter')]", "//*[contains(@class, 'editsection')]", "//*[contains(@class, 'error')]", "//span[@title='pronunciation:']", ), ) ) ); picoFeed-0.1.18/lib/PicoFeed/Rules/.wired.com.php000066400000000000000000000021521265625172200213230ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.wired.com/gamelife/2013/09/ouya-free-the-games/', 'body' => array( '//div[@data-js="gallerySlides"]', '//article', ), 'strip' => array( '//*[@id="linker_widget"]', '//*[@class="credit"]', '//div[@data-js="slideCount"]', '//*[contains(@class="visually-hidden")]', '//*[@data-slide-number="_endslate"]', '//*[@id="related"]', '//*[contains(@class, "bio")]', '//*[contains(@class, "entry-footer")]', '//*[contains(@class, "mobify_backtotop_link")]', '//*[contains(@class, "gallery-navigation")]', '//*[contains(@class, "gallery-thumbnail")]', '//img[contains(@src, "1x1")]', '//a[contains(@href, "creativecommons")]', '//a[@href="#start-of-content"]', '//ul[@id="article-tags"]', ), ) ) ); picoFeed-0.1.18/lib/PicoFeed/Rules/.wsj.com.php000066400000000000000000000006521265625172200210170ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://online.wsj.com/article/SB10001424127887324108204579023143974408428.html', 'body' => array( '//div[@class="articlePage"]', ), 'strip' => array( '//*[@id="articleThumbnail_2"]', '//*[@class="socialByline"]', ) ) ) ); picoFeed-0.1.18/lib/PicoFeed/Rules/01net.com.php000066400000000000000000000011461265625172200210640ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.01net.com/editorial/624550/twitter-rachete-madbits-un-specialiste-francais-de-lanalyse-dimages/', 'body' => array( '//div[@class="article_ventre_box"]', ), 'strip' => array( '//link', '//*[contains(@class, "article_navigation")]', '//h1', '//*[contains(@class, "article_toolbarMain")]', '//*[contains(@class, "article_imagehaute_box")]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/abstrusegoose.com.php000066400000000000000000000002351265625172200230200ustar00rootroot00000000000000 array( '%.*%' => array( '%alt="(.+)" title="(.+)" */>%' => '/>
$1
$2', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/alainonline.net.php000066400000000000000000000006031265625172200224330ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.alainonline.net/news_details.php?lang=arabic&sid=18907', 'body' => array( '//div[@class="news_details"]', ), 'strip' => array( '//div[@class="news_details"]/div/div[last()]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/aljazeera.com.php000066400000000000000000000012451265625172200220730ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.aljazeera.com/news/2015/09/xi-jinping-seattle-china-150922230118373.html', 'body' => array( '//figure[@class="article-content"]', '//div[@class="article-body"]', ), 'strip' => array( '//h1', '//h3', '//ul', '//table[contains(@class, "in-article-item")]', '//a[@target="_self"]', '//div[@data-embed-type="Brightcove"]', '//div[@class="QuoteContainer"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/allafrica.com.php000066400000000000000000000010501265625172200220450ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.aljazeera.com/news/2015/09/xi-jinping-seattle-china-150922230118373.html', 'body' => array( '//div[@class="story-body"]', ), 'strip' => array( '//p[@class="kindofstory"]', '//cite[@class="byline"]', '//div[contains(@class,"related-topics")]', '//links', '//sharebar', '//related-topics', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/allgemeine-zeitung.de.php000066400000000000000000000015401265625172200235320ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.allgemeine-zeitung.de/lokales/polizei/mainz-gonsenheim-unbekannte-rauben-esso-tankstelle-in-kurt-schumacher-strasse-aus_14913147.htm', 'body' => array( '//div[contains(@class, "article")][1]', ), 'strip' => array( '//read/h1', '//*[@id="t-map"]', '//*[contains(@class, "modules")]', '//*[contains(@class, "adsense")]', '//*[contains(@class, "linkbox")]', '//*[contains(@class, "info")]', '//*[@class="skip"]', '//*[@class="funcs"]', '//span[@class="nd address"]', '//a[contains(@href, "abo-und-services")]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/amazingsuperpowers.com.php000066400000000000000000000002131265625172200240740ustar00rootroot00000000000000 array( '%.*%' => array( '%title="(.+)" */>%' => '/>
$1', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/anythingcomic.com.php000066400000000000000000000005521265625172200227710ustar00rootroot00000000000000 array( '%.*%' => array( 'body' => array( '//img[@id="comic_image"]', '//div[@class="comment-wrapper"][position()=1]', ), 'strip' => array(), 'test_url' => 'http://www.anythingcomic.com/comics/2108929/stress-free/', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/ap.org.php000066400000000000000000000006061265625172200205460ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://hosted.ap.org/dynamic/stories/A/AS_CHINA_GAO_ZHISHENG?SITE=AP&SECTION=HOME&TEMPLATE=DEFAULT', 'body' => array( '//img[@class="ap-smallphoto-img"]', '//span[@class="entry-content"]', ), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/areadvd.de.php000066400000000000000000000004431265625172200213540ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.areadvd.de/news/daily-deals-angebote-bei-lautsprecher-teufel-3/', 'body' => array('//div[contains(@class,"entry")]'), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/arstechnica.com.php000066400000000000000000000013641265625172200224230ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://arstechnica.com/tech-policy/2015/09/judge-warners-2m-happy-birthday-copyright-is-bogus/', 'body' => array( '//header/h2', '//section[@id="article-guts"]', '//div[@class="superscroll-content show"]', '//div[@class="gallery"]', ), 'next_page' => '//span[@class="numbers"]/a', 'strip' => array( '//figcaption', '//div[@class="post-meta"]', '//div[@class="gallery-image-credit"]', '//aside', '//div[@class="article-expander"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/awkwardzombie.com.php000066400000000000000000000004231265625172200230000ustar00rootroot00000000000000 array( '%/index.php.*comic=.*%' => array( 'test_url' => 'http://www.awkwardzombie.com/index.php?comic=041315', 'body' => array('//*[@id="comic"]/img'), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/bangkokpost.com.php000066400000000000000000000010461265625172200224560ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.bangkokpost.com/news/politics/704204/new-us-ambassador-arrives-in-bangkok', 'body' => array( '//div[@class="articleContents"]', ), 'strip' => array( '//h2', '//h4', '//div[@class="text-size"]', '//div[@class="relate-story"]', '//div[@class="text-ads"]', '//script', '//ul', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/bgr.com.php000066400000000000000000000005711265625172200207100ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://bgr.com/2015/09/27/iphone-6s-waterproof-testing/', 'body' => array( '//img[contains(@class,"img")]', '//div[@class="text-column"]', ), 'strip' => array( '//strong', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/bigfootjustice.com.php000066400000000000000000000001721265625172200231530ustar00rootroot00000000000000 array( '%.*%' => array( '%-150x150%' => '', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/bizjournals.com.php000066400000000000000000000005501265625172200224750ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.bizjournals.com/milwaukee/news/2015/09/30/bucks-will-hike-prices-on-best-seats-at-new-arena.html', 'body' => array( '//figure/div/a/img', '//p[@class="content__segment"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/blog.fefe.de.php000066400000000000000000000004231265625172200215730ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://blog.fefe.de/?ts=ad706a73', 'body' => array( '/html/body/ul', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/blog.mapillary.com.php000066400000000000000000000004531265625172200230510ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://blog.mapillary.com/update/2015/08/26/traffic-sign-updates.html', 'body' => array( '//div[contains(@class, "blog-post__content")]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/buenosairesherald.com.php000066400000000000000000000010761265625172200236360ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.buenosairesherald.com/article/199344/manzur-named-next-governor-of-tucum%C3%A1n', 'body' => array( '//div[@style="float:none"]', ), 'strip' => array( '//div[contains(@class, "bz_alias_short_desc_container"]', '//td[@id="bz_show_bug_column_1"]', '//table[@id="attachment_table"]', '//table[@class="bz_comment_table"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/bunicomic.com.php000066400000000000000000000004511265625172200221030ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.bunicomic.com/comic/buni-623/', 'body' => array( '//div[@class="comic-table"]', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/buttersafe.com.php000066400000000000000000000005261265625172200223020ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://buttersafe.com/2015/04/21/the-incredible-flexible-man/', 'body' => array( '//div[@id="comic"]', '//div[@class="post-comic"]', ), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/cad-comic.com.php000066400000000000000000000004311265625172200217500ustar00rootroot00000000000000 array( '%/cad/.+%' => array( 'test_url' => 'http://www.cad-comic.com/cad/20150417', 'body' => array( '//*[@id="content"]/img', ), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/chaoslife.findchaos.com.php000066400000000000000000000003731265625172200240300ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://chaoslife.findchaos.com/pets-in-the-wild', 'body' => array('//div[@id="comic"]'), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/cliquerefresh.com.php000066400000000000000000000004641265625172200230000ustar00rootroot00000000000000 array( '%/comic.*%' => array( 'test_url' => 'http://cliquerefresh.com/comic/078-stating-the-obvious/', 'body' => array('//div[@class="comicImg"]/img | //div[@class="comicImg"]/a/img'), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/cnet.com.php000066400000000000000000000027701265625172200210720ustar00rootroot00000000000000 array( '%^/products.*%' => array( 'test_url' => 'http://www.cnet.com/products/fibaro-flood-sensor/#ftag=CADf328eec', 'body' => array( '//li[contains(@class,"slide first"] || //figure[contains(@class,(promoFigure))]', '//div[@class="quickInfo"]', '//div[@class="col-6 ratings"]', '//div[@id="editorReview"]', ), 'strip' => array( '//script', '//a[@class="clickToEnlarge"]', '//div[@section="topSharebar"]', '//div[contains(@class,"related")]', '//div[contains(@class,"ad-")]', '//div[@section="shortcodeGallery"]', ), ), '%.*%' => array( 'test_url' => 'http://cnet.com.feedsportal.com/c/34938/f/645093/s/4a340866/sc/28/l/0L0Scnet0N0Cnews0Cman0Eclaims0Eonline0Epsychic0Emade0Ehim0Ebuy0E10Emillion0Epowerball0Ewinning0Eticket0C0Tftag0FCAD590Aa51e/story01.htm', 'body' => array( '//p[@itemprop="description"]', '//div[@itemprop="articleBody"]', ), 'strip' => array( '//script', '//a[@class="clickToEnlarge"]', '//div[@section="topSharebar"]', '//div[contains(@class,"related")]', '//div[contains(@class,"ad-")]', '//div[@section="shortcodeGallery"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/consomac.fr.php000066400000000000000000000005231265625172200215660ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://consomac.fr/news-2430-l-iphone-6-toujours-un-secret-bien-garde.html', 'body' => array( '//div[contains(@id, "newscontent")]', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/cowbirdsinlove.com.php000066400000000000000000000002131265625172200231600ustar00rootroot00000000000000 array( '%.*%' => array( '%title="(.+)" */>%' => '/>
$1', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/csmonitor.com.php000066400000000000000000000011071265625172200221470ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.csmonitor.com/USA/Politics/2015/0925/John-Boehner-steps-down-Self-sacrificing-but-will-it-lead-to-better-government', 'body' => array( '//figure[@id="image-top-1"]', '//div[@id="story-body"]', ), 'strip' => array( '//script', '//img[@title="hide caption"]', '//*[contains(@class,"promo_link")]', '//div[@id="story-embed-column"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/dailyjs.com.php000066400000000000000000000010771265625172200215770ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://dailyjs.com/2014/08/07/p5js/', 'body' => array( '//div[@id="post"]', ), 'strip' => array( '//h2[@class="post"]', '//div[@class="meta"]', '//*[contains(@class, "addthis_toolbox")]', '//*[contains(@class, "addthis_default_style")]', '//*[@class="navigation small"]', '//*[@id="related"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/dailyreporter.com.php000066400000000000000000000007321265625172200230220ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://dailyreporter.com/2016/01/09/us-supreme-court-case-could-weaken-government-workers-unions/', 'body' => array( '//div[contains(@class, "entry-content")]', ), 'strip' => array( '//div[@class="dmcss_login_form"]', '//*[contains(@class, "sharedaddy")]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/dailytech.com.php000066400000000000000000000005741265625172200221070ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.dailytech.com/Apples+First+Fixes+to+iOS+9+Land+w+iOS++901+Release/article37495.htm', 'body' => array( '//div[@class="NewsBodyImage"]', '//span[@id="lblSummary"]', '//span[@id="lblBody"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/degroupnews.com.php000066400000000000000000000006261265625172200225010ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.degroupnews.com/medias/vodsvod/amazon-concurrence-la-chromecast-de-google-avec-fire-tv-stick', 'body' => array( '//div[@class="contenu"]', ), 'strip' => array( '//div[contains(@class, "a2a")]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/derstandard.at.php000066400000000000000000000006151265625172200222560ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://derstandard.at/2000010267354/The-Witcher-3-Hohe-Hardware-Anforderungen-fuer-PC-Spieler?ref=rss', 'body' => array( '//div[@class="copytext"]', '//ul[@id="media-list"]', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/dilbert.com.php000066400000000000000000000004041265625172200215560ustar00rootroot00000000000000 array( '%.*%' => array( 'body' => array( '//img[@class="img-responsive img-comic"]', ), 'test_url' => 'http://dilbert.com/strip/2016-01-28', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/discovermagazine.com.php000066400000000000000000000007611265625172200234710ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://blogs.discovermagazine.com/the-extremo-files/2015/09/11/have-scientists-found-the-worlds-deepest-fish/', 'body' => array( '//div[@class="entry"]', ), 'strip' => array( '//h1', '//div[@class="meta"]', '//div[@class="shareIcons"]', '//div[@class="navigation"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/distrowatch.com.php000066400000000000000000000004441265625172200224700ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://distrowatch.com/?newsid=08355', 'body' => array( '//td[@class="NewsText"][1]', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/dozodomo.com.php000066400000000000000000000006361265625172200217720ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://dozodomo.com/bento/2014/03/04/lart-des-maki-de-takayo-kiyota/', 'body' => array( '//div[@class="joke"]', '//div[@class="story-cover"]', '//div[@class="story-content"]', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/drawingboardcomic.com.php000066400000000000000000000005641265625172200236160ustar00rootroot00000000000000 array( '%.*%' => array( 'body' => array('//img[@id="comicimage"]'), 'strip' => array(), 'test_url' => 'http://drawingboardcomic.com/index.php?comic=208', ), ), 'filter' => array( '%.*%' => array( '%title="(.+)" */>%' => '/>
$1', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/endlessorigami.com.php000066400000000000000000000001721265625172200231400ustar00rootroot00000000000000 array( '%.*%' => array( '%-150x150%' => '', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/engadget.com.php000066400000000000000000000005201265625172200217060ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.engadget.com/2015/04/20/dark-matter-discovery/?ncid=rss_truncated', 'body' => array('//div[@id="page_body"]/div[@class="container@m-"]'), 'strip' => array('//aside[@role="banner"]'), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/escapistmagazine.com.php000066400000000000000000000061771265625172200234750ustar00rootroot00000000000000 array( '%/articles/view/comicsandcosplay/comics/critical-miss.*%' => array( 'body' => array('//*[@class="body"]/span/img | //div[@class="folder_nav_links"]/following::p'), 'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/critical-miss/13776-Critical-Miss-on-Framerates?utm_source=rss&utm_medium=rss&utm_campaign=articles', 'strip' => array(), ), '%/articles/view/comicsandcosplay/comics/namegame.*%' => array( 'body' => array('//*[@class="body"]/span/p/img[@height != "120"]'), 'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/namegame/9759-Leaving-the-Nest?utm_source=rss&utm_medium=rss&utm_campaign=articles', 'strip' => array(), ), '%/articles/view/comicsandcosplay/comics/stolen-pixels.*%' => array( 'body' => array('//*[@class="body"]/span/p[2]/img'), 'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/stolen-pixels/8866-Stolen-Pixels-258-Where-the-Boys-Are?utm_source=rss&utm_medium=rss&utm_campaign=articles', 'strip' => array(), ), '%/articles/view/comicsandcosplay/comics/bumhugparade.*%' => array( 'body' => array('//*[@class="body"]/span/p[2]/img'), 'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/bumhugparade/8262-Bumhug-Parade-13?utm_source=rss&utm_medium=rss&utm_campaign=articles', 'strip' => array(), ), '%/articles/view/comicsandcosplay.*/comics/escapistradiotheater%' => array( 'body' => array('//*[@class="body"]/span/p[2]/img'), 'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/escapistradiotheater/8265-The-Escapist-Radio-Theater-13?utm_source=rss&utm_medium=rss&utm_campaign=articles', 'strip' => array(), ), '%/articles/view/comicsandcosplay/comics/paused.*%' => array( 'body' => array('//*[@class="body"]/span/p[2]/img | //*[@class="body"]/span/div/img'), 'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/paused/8263-Paused-16?utm_source=rss&utm_medium=rss&utm_campaign=articles', 'strip' => array(), ), '%/articles/view/comicsandcosplay/comics/fraughtwithperil.*%' => array( 'body' => array('//*[@class="body"]'), 'test_url' => 'http://www.escapistmagazine.com/articles/view/comicsandcosplay/comics/fraughtwithperil/12166-The-Escapist-Presents-Escapist-Comics-Critical-Miss-B-lyeh-Fhlop?utm_source=rss&utm_medium=rss&utm_campaign=articles', 'strip' => array(), ), '%/articles/view/video-games/columns/.*%' => array( 'body' => array('//*[@id="article_content"]'), 'test_url' => 'http://www.escapistmagazine.com/articles/view/video-games/columns/experienced-points/13971-What-50-Shades-and-Batman-Have-in-Common.2', 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/espn.go.com.php000066400000000000000000000004111265625172200215000ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://espn.go.com/nfl/story/_/id/13388208/jason-whitlock-chip-kelly-controversy', 'body' => array( '//p', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/exocomics.com.php000066400000000000000000000005401265625172200221230ustar00rootroot00000000000000 array( '%.*%' => array( 'body' => array('//a[@class="comic"]/img'), 'strip' => array(), 'test_url' => 'http://www.exocomics.com/379', ), ), 'filter' => array( '%.*%' => array( '%title="(.+)" */>%' => '/>
$1', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/explosm.net.php000066400000000000000000000004411265625172200216310ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://explosm.net/comics/3803/', 'body' => array( '//div[@id="comic-container"]', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/extrafabulouscomics.com.php000066400000000000000000000001731265625172200242160ustar00rootroot00000000000000 array( '%.*%' => array( '%-150x150%' => '', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/fastcodesign.com.php000066400000000000000000000005531265625172200226070ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.fastcodesign.com/3026548/exposure/peek-inside-the-worlds-forbidden-subway-tunnels', 'body' => array( '//article[contains(@class, "body prose")]', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/fastcoexist.com.php000066400000000000000000000006111265625172200224650ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.fastcoexist.com/3026114/take-a-seat-on-this-gates-funded-future-toilet-that-will-change-how-we-think-about-poop', 'body' => array( '//article[contains(@class, "body prose")]', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/fastcompany.com.php000066400000000000000000000005541265625172200224630ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.fastcompany.com/3026712/fast-feed/elon-musk-an-apple-tesla-merger-is-very-unlikely', 'body' => array( '//article[contains(@class, "body prose")]', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/ffworld.com.php000066400000000000000000000004611265625172200215770ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.ffworld.com/?rub=news&page=voir&id=2709', 'body' => array( '//div[@class="news_body"]', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/foreignpolicy.com.php000066400000000000000000000012761265625172200230120ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://foreignpolicy.com/2016/01/09/networking-giant-pulls-nsa-linked-code-exploited-by-hackers/', 'body' => array( '//article', ), 'strip' => array( '//div[@id="post-category"]', '//div[@id="desktop-right"]', '//h1', '//section[@class="article-meta"]', '//div[@class="side-panel-wrapper"]', '//*[contains(@class, "share-")]', '//*[contains(@id, "taboola-")]', '//div[@class="comments"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/fossbytes.com.php000066400000000000000000000014431265625172200221560ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://fossbytes.com/fbi-hacked-1000-computers-to-shut-down-largest-child-pornography-site-on-the-dark-web/', 'body' => array( '//div[@class="entry-inner"]', ), 'strip' => array( '//*[@class="at-above-post addthis_default_style addthis_toolbox at-wordpress-hide"]', '//*[@class="at-below-post addthis_default_style addthis_toolbox at-wordpress-hide"]', '//*[@class="at-below-post-recommended addthis_default_style addthis_toolbox at-wordpress-hide"]', '//*[@class="code-block code-block-12 ai-desktop"]', '//*[@class="code-block code-block-13 ai-tablet-phone"]', ), ), ), );picoFeed-0.1.18/lib/PicoFeed/Rules/fowllanguagecomics.com.php000066400000000000000000000004311265625172200240020ustar00rootroot00000000000000 array( '%.*%' => array( 'body' => array('//*[@id="comic"] | //*[@class="post-image"]'), 'strip' => array(), 'test_url' => 'http://www.fowllanguagecomics.com/comic/working-out/', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/geek.com.php000066400000000000000000000006741265625172200210550ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.geek.com/news/the-11-best-ways-to-eat-eggs-1634076/', 'body' => array( '//div[@class="articleinfo"]/figure', '//div[@class="articleinfo"]/article', '//span[@class="by"]', ), 'strip' => array( '//span[@class="red"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/gerbilwithajetpack.com.php000066400000000000000000000004551265625172200240020ustar00rootroot00000000000000 array( '%.*%' => array( 'body' => array( '//div[@id="comic-1"]', '//div[@class="entry"]', ), 'test_url' => 'http://gerbilwithajetpack.com/passing-the-digital-buck/', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/giantitp.com.php000066400000000000000000000004221265625172200217500ustar00rootroot00000000000000 array( '%/comics/oots.*%' => array( 'test_url' => 'http://www.giantitp.com/comics/oots0989.html', 'body' => array( '//td[@align="center"]/img', ), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/github.com.php000066400000000000000000000005301265625172200214130ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'https://github.com/audreyr/favicon-cheat-sheet', 'body' => array( '//article[contains(@class, "entry-content")]', ), 'strip' => array( '//h1', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/gocomics.com.php000066400000000000000000000004431265625172200217370ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.gocomics.com/pearlsbeforeswine/2015/05/30', 'body' => array( '//div[1]/p[1]/a[1]/img', ), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/golem.de.php000066400000000000000000000005461265625172200210550ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.golem.de/news/breko-telekom-verzoegert-gezielt-den-vectoring-ausbau-1311-102974.html', 'body' => array( '//header[@class="cluster-header"]', '//div[@class="formatted"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/greekculture.com.php000066400000000000000000000004211265625172200226310ustar00rootroot00000000000000 array( '%/joyoftech/.*%' => array( 'body' => array( '//img[@width="640"]', ), 'test_url' => 'http://www.geekculture.com/joyoftech/joyarchives/2235.html', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/happletea.com.php000066400000000000000000000007151265625172200221010ustar00rootroot00000000000000 array( '%.*%' => array( 'body' => array( '//div[@id="comic"]', '//div[@class="entry"]', ), 'strip' => array('//div[@class="ssba"]'), 'test_url' => 'http://www.happletea.com/comic/mans-best-friend/', ), ), 'filter' => array( '%.*%' => array( '%title="(.+)" */>%' => '/>
$1', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/heise.de.php000066400000000000000000000005751265625172200210510ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.heise.de/security/meldung/BND-300-Millionen-Euro-fuer-Fruehwarnsystem-gegen-Cyber-Attacken-2192237.html', 'body' => array( '//div[@class="meldung_wrapper"]', '//div[@class="artikel_content"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/huffingtonpost.com.php000066400000000000000000000005311265625172200232070ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.huffingtonpost.com/2014/02/20/centscere-social-media-syracuse_n_4823848.html', 'body' => array( '//article[@class="content")]', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/imogenquest.net.php000066400000000000000000000002131265625172200224770ustar00rootroot00000000000000 array( '%.*%' => array( '%title="(.+)" */>%' => '/>
$1', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/ing.dk.php000066400000000000000000000005571265625172200205370ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://ing.dk/artikel/smart-husisolering-og-styring-skal-mindske-japans-energikrise-164517', 'body' => array( '//section[contains(@class, "teaser")]', '//section[contains(@class, "body")]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/invisiblebread.com.php000066400000000000000000000003221265625172200231120ustar00rootroot00000000000000 array( '%.*%' => array( '%()%' => '$1', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/ir.amd.com.php000066400000000000000000000004321265625172200213040ustar00rootroot00000000000000 array( '%.*%' => array( 'body' => array('//span[@class="ccbnTxt"]'), 'strip' => array(), 'test_url' => 'http://ir.amd.com/phoenix.zhtml?c=74093&p=RssLanding&cat=news&id=2055819', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/japantimes.co.jp.php000066400000000000000000000012721265625172200225230ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.japantimes.co.jp/news/2015/09/27/world/social-issues-world/pope-meets-sex-abuse-victims-philadelphia-promises-accountability/', 'body' => array( '//article[@role="main"]', ), 'strip' => array( '//script', '//header', '//div[contains(@class, "meta")]', '//div[@class="clearfix"]', '//div[@class="OUTBRAIN"]', '//ul[@id="content_footer_menu"]', '//div[@class="article_footer_ad"]', '//div[@id="disqus_thread"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/japantoday.com.php000066400000000000000000000006311265625172200222650ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.japantoday.com/category/politics/view/japan-u-s-to-sign-new-base-environment-pact', 'body' => array( '//div[@id="article_container"]', ), 'strip' => array( '//h2', '//div[@id="article_info"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/journaldugeek.com.php000066400000000000000000000004441265625172200227740ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www./2014/05/20/le-playstation-now-arrive-en-beta-fermee-aux-etats-unis/', 'body' => array( '//div[@class="post-content"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/jsonline.com.php000066400000000000000000000014601265625172200217550ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.jsonline.com/news/usandworld/as-many-as-a-million-expected-for-popes-last-mass-in-us-b99585180z1-329688131.html', 'body' => array( '//div[@id="mainContent"]', ), 'strip' => array( '//script', '//h1', '//h4[@class="credit"]', '//div[@class="columnist_container"]', '//div[@class="storyTimestamp"]', '//ul[@id="sharing-tools"]', '//div[@class="title"]', '//img[@class="floatLeft"]', '//div[@class="first feature"]', '//div[@class="collateral_article_content"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/kanpai.fr.php000066400000000000000000000005021265625172200212240ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.kanpai.fr/japon/comment-donner-lheure-en-japonais.html', 'body' => array( '//div[@class="single-left"]', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/karriere.jobfinder.dk.php000066400000000000000000000006011265625172200235150ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://karriere.jobfinder.dk/artikel/dansk-professor-skal-lede-smart-grid-forskning-20-millioner-dollars-763', 'body' => array( '//section[contains(@class, "teaser")]', '//section[contains(@class, "body")]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/koreaherald.com.php000066400000000000000000000004061265625172200224140ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.koreaherald.com/view.php?ud=20150926000018', 'body' => array( '//div[@class="content_view"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/koreatimes.php000066400000000000000000000006121265625172200215200ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.koreatimes.co.kr/www/news/nation/2015/12/116_192409.html', 'body' => array( '//div[@id="p"]', ), 'strip' => array( '//script', '//div[@id="webtalks_btn_listenDiv"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/lastplacecomics.com.php000066400000000000000000000001731265625172200233020ustar00rootroot00000000000000 array( '%.*%' => array( '%-150x150%' => '', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/lejapon.fr.php000066400000000000000000000010221265625172200214070ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://lejapon.fr/guide-voyage-japon/5223/tokyo-sous-la-neige.htm', 'body' => array( '//div[@class="entry"]', ), 'strip' => array( '//*[contains(@class, "addthis_toolbox")]', '//*[contains(@class, "addthis_default_style")]', '//*[@class="navigation small"]', '//*[@id="related"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/lesjoiesducode.fr.php000066400000000000000000000005431265625172200227670ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://lesjoiesducode.fr/post/75576211207/quand-lappli-ne-fonctionne-plus-sans-aucune-raison', 'body' => array( '//div[@class="blog-post-content"]', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/lfg.co.php000066400000000000000000000006321265625172200205270ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.lfg.co/page/871/?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+LookingForGroup+%28Looking+For+Group%29&utm_content=FeedBurner', 'body' => array( '//*[@id="comic"]/img | //*[@class="content"]', ), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/lifehacker.com.php000066400000000000000000000010641265625172200222310ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://lifehacker.com/bring-water-bottle-caps-into-concerts-to-protect-your-d-1269334973', 'body' => array( '//div[contains(@class, "row")/img', '//div[contains(@class, "content-column")]', ), 'strip' => array( '//*[contains(@class, "meta")]', '//span[contains(@class, "icon")]', '//h1', '//aside', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/linux.org.php000066400000000000000000000005371265625172200213100ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.linux.org/threads/lua-the-scripting-interpreter.8352/', 'body' => array( '//div[@class="messageContent"]', ), 'strip' => array( '//aside', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/linuxinsider.com.php000066400000000000000000000010651265625172200226520ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.linuxinsider.com/story/82526.html?rss=1', 'body' => array( '//div[@id="story"]', ), 'strip' => array( '//script', '//h1', '//div[@id="story-toolbox1"]', '//div[@id="story-byline"]', '//div[@id="story"]/p', '//div[@class="story-advertisement"]', '//iframe', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/lists.php000066400000000000000000000004711265625172200205160ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://lists.freebsd.org/pipermail/freebsd-announce/2013-September/001504.html', 'body' => array( '//pre', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/loadingartist.com.php000066400000000000000000000001721265625172200227770ustar00rootroot00000000000000 array( '%.*%' => array( '%-150x150%' => '', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/loldwell.com.php000066400000000000000000000003621265625172200217520ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://loldwell.com/?comic=food-math-101', 'body' => array('//*[@id="comic"]'), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/lukesurl.com.php000066400000000000000000000005711265625172200220040ustar00rootroot00000000000000 array( '%.*%' => array( 'body' => array('//div[@id="comic"]//img'), 'strip' => array(), 'test_url' => 'http://www.lukesurl.com/archives/comic/665-3-of-clubs', ), ), 'filter' => array( '%.*%' => array( '%title="(.+)" */>%' => '/>
$1', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/macg.co.php000066400000000000000000000005411265625172200206650ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.macg.co//logiciels/2014/05/feedly-sameliore-un-petit-peu-sur-mac-82205', 'body' => array( '//div[contains(@class, "field-name-body")]', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/marc.info.php000066400000000000000000000004411265625172200212310ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://marc.info/?l=openbsd-misc&m=141987113202061&w=2', 'body' => array( '//pre', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/marriedtothesea.com.php000066400000000000000000000004401265625172200233110ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.marriedtothesea.com/index.php?date=052915', 'body' => array( '//div[@align]/a/img', ), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/marycagle.com.php000066400000000000000000000005271265625172200221030ustar00rootroot00000000000000 array( '%.*%' => array( 'body' => array( '//img[@id="cc-comic"]', '//div[@class="cc-newsbody"]', ), 'strip' => array(), 'test_url' => 'http://www.marycagle.com/letsspeakenglish/74-grim-reality/', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/maximumble.thebookofbiff.com.php000066400000000000000000000004211265625172200250760ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://maximumble.thebookofbiff.com/2015/04/20/1084-change/', 'body' => array('//div[@id="comic"]/div/a/img'), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/medium.com.php000066400000000000000000000004771265625172200214230ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'https://medium.com/lessons-learned/917b8b63ae3e', 'body' => array( '//div[contains(@class, "post-field body")]', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/mercworks.net.php000066400000000000000000000007211265625172200221570ustar00rootroot00000000000000 array( '%.*%' => array( 'body' => array('//div[@id="comic"]', '//div[contains(@class,"entry-content")]', ), 'strip' => array(), 'test_url' => 'http://mercworks.net/comicland/healthy-choice/', ), ), 'filter' => array( '%.*%' => array( '%title="(.+)" */>%' => '/>
$1', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/metronieuws.nl.php000066400000000000000000000006171265625172200223530ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.metronieuws.nl/sport/2015/04/broer-fellaini-zorgde-bijna-voor-paniek-bij-mourinho', 'body' => array('//div[contains(@class,"article-top")]/div[contains(@class,"image-component")] | //div[@class="article-full-width"]/div[1]'), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/milwaukeenns.php000066400000000000000000000006301265625172200220570ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://milwaukeenns.org/2016/01/08/united-way-grant-enables-sdc-to-restore-free-tax-assistance-program/', 'body' => array( '//div[@class="pf-content"]', ), 'strip' => array( '//div[@class="printfriendly"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/mlb.com.php000066400000000000000000000010341265625172200207030ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://m.brewers.mlb.com/news/article/161364798', 'body' => array( '//article', ), 'strip' => array( '//div[@class="article-top"]', '//div[contains(@class, "contributor-bottom")]', '//p[@class="tagline"]', '//div[contains(@class, "social-")]', '//div[@class="button-wrap"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/mokepon.smackjeeves.com.php000066400000000000000000000004751265625172200241100ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://mokepon.smackjeeves.com/comics/2120096/chapter-9-page-68/', 'body' => array('//*[@id="comic_area_inner"]/img | //*[@id="comic_area_inner"]/a/img'), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/monwindowsphone.com.php000066400000000000000000000005461265625172200233760ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.monwindowsphone.com/tout-savoir-sur-le-centre-d-action-de-windows-phone-8-1-t40574.html', 'body' => array( '//div[@class="blog-post-body"]', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/mrlovenstein.com.php000066400000000000000000000002631265625172200226610ustar00rootroot00000000000000 array( '%.*%' => array( '%alt="(.+)" */>%' => '/>
$1', '%\.png%' => '_rollover.png', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/muckrock.com.php000066400000000000000000000006531265625172200217550ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'https://www.muckrock.com/news/archives/2016/jan/13/5-concerns-private-prisons/', 'body' => array( '//div[@class="content"]', ), 'strip' => array( '//h1', '//div[@class="secondary"]', '//aside', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/nationaljournal.com.php000066400000000000000000000007271265625172200233410ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.nationaljournal.com/s/354962/south-carolina-evangelicals-outstrip-establishment?mref=home_top_main', 'body' => array( '//div[@class="section-body"]', ), 'strip' => array( '//*[contains(@class, "-related")]', '//*[contains(@class, "social")]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/nature.com.php000066400000000000000000000004701265625172200214320ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.nature.com/doifinder/10.1038/nature.2015.18340', 'body' => array( '//div[contains(@class,"main-content")]', ), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/nba.com.php000066400000000000000000000007021265625172200206720ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.nba.com/2015/news/09/25/knicks-jackson-to-spend-more-time-around-coaching-staff.ap/index.html?rss=true', 'body' => array( '//section[@id="nbaArticleContent"]', ), 'strip' => array( '//div[@id="nbaArticleSocialWrapper_bot"]', '//h5', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/nedroid.com.php000066400000000000000000000002131265625172200215530ustar00rootroot00000000000000 array( '%.*%' => array( '%title="(.+)" */>%' => '/>
$1', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/networkworld.com.php000066400000000000000000000012421265625172200226730ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.networkworld.com/article/3020585/security/the-incident-response-fab-five.html', 'body' => array( '//figure/img[@class="hero-img"]', '//section[@class="deck"]', '//div[@itemprop="articleBody"] | //div[@itemprop="reviewBody"]', '//div[@class="carousel-inside-crop"]', ), 'strip' => array( '//script', '//aside', '//div[@class="credit"]', '//div[@class="view-large"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/neustadt-ticker.de.php000066400000000000000000000006761265625172200230640ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.neustadt-ticker.de/41302/alltag/kultur/demo-auf-der-boehmischen', 'body' => array( '//div[@class="entry-content"]', ), 'strip' => array( '//*[contains(@class, "sharedaddy")]', '//*[contains(@class, "yarpp-related")]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/news.sciencemag.org.php000066400000000000000000000010661265625172200232200ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://news.sciencemag.org/biology/2015/09/genetic-engineering-turns-common-plant-cancer-fighter', 'body' => array( '//div[@class="content"]', ), 'strip' => array( '//h1[@class="snews-article__headline"]', '//div[contains(@class,"easy_social_box")]', '//div[@class="author-teaser"]', '//div[@class="article-byline"]', ), ), ) ); picoFeed-0.1.18/lib/PicoFeed/Rules/niceteethcomic.com.php000066400000000000000000000004131265625172200231140ustar00rootroot00000000000000 array( '%/archives.*%' => array( 'test_url' => 'http://niceteethcomic.com/archives/page119/', 'body' => array('//*[@class="comicpane"]/a/img'), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/nichtlustig.de.php000066400000000000000000000003341265625172200223020ustar00rootroot00000000000000 array( '%.*%' => array( '%.*static.nichtlustig.de/comics/full/(\\d+).*%s' => '', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/oglaf.com.php000066400000000000000000000007401265625172200212240ustar00rootroot00000000000000 array( '%.*%' => array( 'body' => array( '//img[@id="strip"]', '//a/div[@id="nx"]/..', ), 'strip' => array(), 'test_url' => 'http://oglaf.com/slodging/', ), ), 'filter' => array( '%.*%' => array( '%alt="(.+)" title="(.+)" */>%' => '/>
$1
$2
', '%%' => 'Next page', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/onhax.net.php000066400000000000000000000006501265625172200212610ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://onhax.net/process-lasso-8-9-1-4-pro-key-portable-is-here-latest', 'body' => array( '//div[@class="postcontent"]', ), 'strip' => array( '//*[@class="sharedaddy sd-sharing-enabled"]', '//*[@class="yarpp-related"]', ), ), ), );picoFeed-0.1.18/lib/PicoFeed/Rules/onmilwaukee.php000066400000000000000000000014511265625172200216770ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://onmilwaukee.com/movies/articles/downerspelunking.html', 'body' => array( '//article[contains(@class, "show")]', ), 'strip' => array( '//h1', '//div[contains(@class, "-ad")]', '//div[contains(@class, "_ad")]', '//div[@id="pub_wrapper"]', '//div[contains(@class, "share_tools")]', '//div[@class="clearfix"]', '//div[contains(@class,"image_control")]', '//section[@class="ribboned"]', '//div[contains(@class,"sidebar")]', '//aside[@class="article_tag_list"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/openrightsgroup.org.php000066400000000000000000000011621265625172200234030ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'https://www.openrightsgroup.org/blog/2014/3-days-to-go-till-orgcon2014', 'body' => array( '//div[contains(@class, "content")]/div', ), 'strip' => array( '//h2[1]', '//div[@class="info"]', '//div[@class="tags"]', '//div[@class="comments"]', '//div[@class="breadcrumbs"]', '//h1[@class="pageTitle"]', '//p[@class="bookmarkThis"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/opensource.com.php000066400000000000000000000005441265625172200223200ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://opensource.com/life/15/10/how-internet-things-will-change-way-we-think', 'body' => array( '//img[@class="image-full-size"]', '//div[contains(@class="field-type-text-with-summary")]', ), ) ) ); picoFeed-0.1.18/lib/PicoFeed/Rules/optipess.com.php000066400000000000000000000002131265625172200217750ustar00rootroot00000000000000 array( '%.*%' => array( '%title="(.+)" */>%' => '/>
$1', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/osnews.com.php000066400000000000000000000004511265625172200214510ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://osnews.com/story/28863/Google_said_to_be_under_US_antitrust_scrutiny_over_Android', 'body' => array( '//div[@class="newscontent1"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/pastebin.com.php000066400000000000000000000004261265625172200217420ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://pastebin.com/ed1pP9Ak', 'body' => array( '//div[@class="text"]', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/peebleslab.com.php000066400000000000000000000003221265625172200222260ustar00rootroot00000000000000 array( '%.*%' => array( // the extra space is required to strip the title cleanly '%title="(.+) " */>%' => '/>
$1', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/penny-arcade.com.php000066400000000000000000000010661265625172200225040ustar00rootroot00000000000000 array( '%/news/.*%' => array( 'test_url' => 'http://penny-arcade.com/news/post/2015/04/15/101-part-two', 'body' => array( '//*[@class="postBody"]/*', ), 'strip' => array( ), ), '%/comic/.*%' => array( 'test_url' => 'http://penny-arcade.com/comic/2015/04/15', 'body' => array( '//*[@id="comicFrame"]/a/img', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/pixelbeat.org.php000066400000000000000000000004711265625172200221230ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.pixelbeat.org/programming/sigpipe_handling.html#1425573246', 'body' => array( '//div[@class="contentText"]', ), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/plus.google.com.php000066400000000000000000000004371265625172200223750ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'https://plus.google.com/+LarryPage/posts/Lh8SKC6sED1', 'body' => array( '//div[@role="article"]/div[contains(@class, "eE")]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/popstrip.com.php000066400000000000000000000002311265625172200220070ustar00rootroot00000000000000 array( '%.*%' => array( '%( '$1$2$1bonus.png"/>', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/putaindecode.fr.php000066400000000000000000000007201265625172200224270ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://putaindecode.fr/posts/js/etat-lieux-js-modulaire-front/', 'body' => array( '//*[@class="putainde-Post-md"]', ), 'strip' => array( '//*[contains(@class, "inlineimg")]', '//*[contains(@class, "comment-respond")]', '//header', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/recode.net.php000066400000000000000000000012741265625172200214100ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://recode.net/2015/09/26/big-tech-rolls-out-red-carpet-for-indian-prime-minister-lobbies-behind-closed-doors/', 'body' => array( '//img[contains(@class,"attachment-large")]', '//div[contains(@class,"postarea")]', '//li[@class,"author"]', ), 'strip' => array( '//script', '//div[contains(@class,"sharedaddy")]', '//div[@class="post-send-off"]', '//div[@class="large-12 columns"]', '//div[contains(@class,"inner-related-article")]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/retractionwatch.com.php000066400000000000000000000010371265625172200233350ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://retractionwatch.com/2015/11/12/psychologist-jens-forster-settles-case-by-agreeing-to-2-retractions/', 'body' => array( '//*[@class="main"]', '//*[@class="entry-content"]', ), 'strip' => array( '//*[contains(@class, "sharedaddy")]', '//*[contains(@class, "jp-relatedposts")]', '//p[@class="p1"]', ) ) ) ); picoFeed-0.1.18/lib/PicoFeed/Rules/rue89.nouvelobs.com.php000066400000000000000000000007241265625172200231250ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://rue89.feedsportal.com/c/33822/f/608948/s/30999fa0/sc/24/l/0L0Srue890N0C20A130C0A80C30A0Cfaisait0Eboris0Eboillon0Eex0Esarko0Eboy0E350A0E0A0A0A0Eeuros0Egare0Enord0E245315/story01.htm', 'body' => array( '//*[@id="article"]/div[contains(@class, "content")]', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/satwcomic.com.php000066400000000000000000000005271265625172200221300ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://satwcomic.com/day-at-the-beach', 'body' => array( '//div[@class="container"]/center/a/img', '//span[@itemprop="articleBody"]', ), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/scrumalliance.org.php000066400000000000000000000005511265625172200227670ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'https://www.scrumalliance.org/community/articles/2015/march/an-introduction-to-agile-project-intake?feed=articles', 'body' => array( '//div[@class="article_content"]', ), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/securityfocus.com.php000066400000000000000000000004001265625172200230340ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.securityfocus.com/news/11569?ref=rss', 'body' => array( '//div[@class="expanded"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/sentfromthemoon.com.php000066400000000000000000000006741265625172200233710ustar00rootroot00000000000000 array( '%.*%' => array( 'body' => array( '//div[@class="comicpane"]/a/img', '//div[@class="entry"]', ), 'strip' => array(), 'test_url' => 'http://sentfromthemoon.com/archives/1417', ), ), 'filter' => array( '%.*%' => array( '%title="(.+)" */>%' => '/>
$1', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/sitepoint.com.php000066400000000000000000000004761265625172200221600ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.sitepoint.com/creating-hello-world-app-swift/', 'body' => array( '//section[@class="article_body"]', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/slashdot.org.php000066400000000000000000000006211265625172200217640ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://science.slashdot.org/story/15/04/20/0528253/pull-top-can-tabs-at-50-reach-historic-archaeological-status', 'body' => array( '//article/div[@class="body"] | //article[@class="layout-article"]/div[@class="elips"]', ), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/smallhousebliss.com.php000066400000000000000000000011531265625172200233440ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://smallhousebliss.com/2013/08/29/house-g-by-lode-architecture/', 'body' => array( '//div[@class="post-content"]', ), 'strip' => array( '//*[contains(@class, "gallery")]', '//*[contains(@class, "share")]', '//*[contains(@class, "wpcnt")]', '//*[contains(@class, "meta")]', '//*[contains(@class, "postitle")]', '//*[@id="nav-below"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/smarthomewelt.de.php000066400000000000000000000005201265625172200226350ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://smarthomewelt.de/apple-tv-amazon-echo-smart-home/', 'body' => array('//div[@class="entry-inner"]/p | //div[@class="entry-inner"]/div[contains(@class,"wp-caption")]'), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/smashingmagazine.com.php000066400000000000000000000004731265625172200234640ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.smashingmagazine.com/2015/04/17/using-sketch-for-responsive-web-design-case-study/', 'body' => array('//article[contains(@class,"post")]/p'), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/smbc-comics.com.php000066400000000000000000000002211265625172200223250ustar00rootroot00000000000000 array( '%.*%' => array( '%()%' => '$1$2$1after$2', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/soundandvision.com.php000066400000000000000000000011721265625172200231770ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.soundandvision.com/content/james-guthrie-mixing-roger-waters-and-pink-floyd-51', 'body' => array( '//div[@id="left"]', ), 'strip' => array( '//div[@class="meta"]', '//div[@class="ratingsbox"]', '//h1', '//h2', '//addthis', '//comment-links', '//div[@class="book-navigation"]', '//div[@class="comment-links"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/spiegel.de.php000066400000000000000000000005201265625172200213720ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.spiegel.de/politik/ausland/afrika-angola-geht-gegen-islam-vor-und-schliesst-moscheen-a-935788.html', 'body' => array( '//div[contains(@class, "article-section")]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/stereophile.com.php000066400000000000000000000004531265625172200224600ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.stereophile.com/content/2015-rocky-mountain-audio-fest-starts-friday', 'body' => array( '//div[@class="content clear-block"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/stupidfox.net.php000066400000000000000000000005201265625172200221650ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://stupidfox.net/134-sleepy-time', 'body' => array( '//div[@class="comicmid"]/center/a/img', '//div[@class="stand_high"]', ), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/subtraction.com.php000066400000000000000000000006251265625172200224730ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.subtraction.com/2015/06/06/time-lapse-video-of-one-world-trade-center/', 'body' => array('//article/div[@class="entry-content"]'), 'strip' => array(), ), ), 'filter' => array( '%.*%' => array( '%\+%' => '', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/sz.de.php000066400000000000000000000006231265625172200204020ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://sz.de/1.2443161', 'body' => array('//article[@id="sitecontent"]/section[@class="topenrichment"]//img | //article[@id="sitecontent"]/section[@class="body"]/section[@class="authors"]/preceding-sibling::*[not(contains(@class, "ad"))]'), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/techcrunch.com.php000066400000000000000000000006551265625172200222670ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://techcrunch.com/2013/08/31/indias-visa-maze/', 'body' => array( '//div[contains(@class, "media-container")]', '//div[@class="body-copy"]', ), 'strip' => array( '//*[contains(@class, "module-crunchbase")]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/the-ebook-reader.com.php000066400000000000000000000004621265625172200232520ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://blog.the-ebook-reader.com/2015/09/25/kobo-glo-hd-and-kobo-touch-2-0-covers-and-cases-roundup/', 'body' => array( '//div[@class="entry"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/theatlantic.com.php000066400000000000000000000014451265625172200224370ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.theatlantic.com/politics/archive/2015/09/what-does-it-mean-to-lament-the-poor-inside-panem/407317/', 'body' => array( '//picture[@class="img"]', '//figure/figcaption/span', '//div/p[@itemprop="description"]', '//div[@class="article-body"]', ), 'strip' => array( '//aside[@class="callout"]', '//span[@class="credit"]', '//figcaption[@class="credit"]', '//aside[contains(@class,"partner-box")]', '//div[contains(@class,"ad")]', '//a[contains(@class,"social-icon")]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/theawkwardyeti.com.php000066400000000000000000000001731265625172200231700ustar00rootroot00000000000000 array( '%.*%' => array( '%-150x150%' => '', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/thecodinglove.com.php000066400000000000000000000003741265625172200227710ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://thecodinglove.com/post/116897934767', 'body' => array('//div[@class="bodytype"]'), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/thedoghousediaries.com.php000066400000000000000000000006661265625172200240220ustar00rootroot00000000000000 array( '%.*%' => array( 'body' => array( '//div[@class="comicpane"]/a/img', '//div[@class="entry"]', ), 'strip' => array(), 'test_url' => 'http://thedoghousediaries.com/6023', ), ), 'filter' => array( '%.*%' => array( '%title="(.+)" */>%' => '/>
$1', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/thegamercat.com.php000066400000000000000000000004521265625172200224200ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.thegamercat.com/comic/just-no/', 'body' => array('//div[@id="comic"] | //div[@class="post-content"]/div[@class="entry"]/p'), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/thehindu.com.php000066400000000000000000000012721265625172200217450ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.thehindu.com/sci-tech/science/why-is-the-shape-of-cells-in-a-honeycomb-always-hexagonal/article7692306.ece?utm_source=RSS_Feed&utm_medium=RSS&utm_campaign=RSS_Syndication', 'body' => array( '//div/img[@class="main-image"]', '//div[@class="photo-caption"]', '//div[@class="articleLead"]', '//p', '//span[@class="upper"]', ), 'strip' => array( '//div[@id="articleKeywords"]', '//div[@class="photo-source"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/thelocal.se.php000066400000000000000000000012301265625172200215530ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.thelocal.se/20151018/swedish-moderates-tighten-focus-on-begging-ban', 'body' => array( '//article', ), 'strip' => array( '//p[@id="mobile-signature"]', '//article/div[4]', '//article/ul[1]', '//div[@class="clr"]', '//p[@class="small"]', '//p[@style="font-weight: bold; font-size: 14px;"]', '//div[@class="author"]', '//div[@class="ad_container"]', ) ) ) ); picoFeed-0.1.18/lib/PicoFeed/Rules/themerepublic.net.php000066400000000000000000000006051265625172200227740ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.themerepublic.net/2015/04/david-lopez-pitoko.html?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+blogspot%2FDngUJ+%28Theme+Republic%29&utm_content=FeedBurner', 'body' => array('//*[@class="post-body"]'), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/themoscowtimes.com.php000066400000000000000000000010471265625172200232070ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.themoscowtimes.com/business/article/535500.html', 'body' => array( '//div[@class="article_main_img"]', '//div[@class="article_text"]', ), 'strip' => array( '//div[@class="articlebottom"]', '//p/b', '//p/a[contains(@href, "/article.php?id=")]', '//div[@class="disqus_wrap"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/thenewslens.com.php000066400000000000000000000020061265625172200224700ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://international.thenewslens.com/post/255032/', 'body' => array( '//article/main[contains(@class, "content-post")]', ), 'strip' => array( '//div[@class="photo-credit"]', '//p[@align="center"]', '//div[@class="clearfix"]', '//div[@class="authorZone"]', '//style', '//div[@id="ttsbox"]', '//div[@id="ttscontrols"]', '//div[@class="author-info"]', '//div[contains(@id, "-ad")]', '//div[@style="font-size:small;margin:3px 0 0 0;vertical-align:top;line-height:24px;"]', '//div[contains(@class, "hidden-xs")]', '//div[contains(@class, "visible-xs")]', '//div[contains(@class, "visible-lg")]', '//a[@name="comment-panel"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/theodd1sout.com.php000066400000000000000000000001721265625172200223760ustar00rootroot00000000000000 array( '%.*%' => array( '%-150x150%' => '', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/theonion.com.php000066400000000000000000000006341265625172200217610ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.theonion.com/article/wild-eyed-jim-harbaugh-informs-players-they-must-k-51397?utm_medium=RSS&utm_campaign=feeds', 'body' => array( '//div[@class="content-masthead"]/figure/div/noscript/img', '//div[@class="content-text"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/thestandard.com.hk.php000066400000000000000000000011571265625172200230410ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.thestandard.com.hk/breaking_news_detail.asp?id=67156', 'body' => array( '//table/tr/td/span[@class="bodyCopy"]', ), 'strip' => array( '//script', '//br', '//map[@name="gif_bar"]', '//img[contains(@usemap,"gif_bar")]', '//a', '//span[@class="bodyHeadline"]', '//i', '//b', '//table', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/threepanelsoul.com.php000066400000000000000000000003741265625172200231710ustar00rootroot00000000000000 array( '%.*%' => array( 'body' => array( '//img[@id="cc-comic"]', ), 'test_url' => 'http://www.threepanelsoul.com/comic/uncloaking', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/timesofindia.indiatimes.com.php000066400000000000000000000007231265625172200247350ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://timesofindia.indiatimes.com/city/mangaluru/Adani-UPCL-to-release-CSR-grant-of-Rs-3-74-crore-to-YellurGram-Panchayat/articleshow/50512116.cms', 'body' => array( '//div[@class="article_content clearfix"]', '//section[@class="highlight clearfix"]', ), 'strip' => array( ), ), ), );picoFeed-0.1.18/lib/PicoFeed/Rules/travel-dealz.de.php000066400000000000000000000007021265625172200223360ustar00rootroot00000000000000 array( '%^/blog.*%' => array( 'test_url' => 'http://travel-dealz.de/blog/venere-gutschein/', 'body' => array('//div[@class="post-entry"]'), 'strip' => array( '//*[@id="jp-relatedposts"]', '//*[@class="post-meta"]', '//*[@class="post-data"]', '//*[@id="author-meta"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/treehugger.com.php000066400000000000000000000006131265625172200222740ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.treehugger.com/uncategorized/top-ten-posts-week-bunnies-2.html', 'body' => array( '//div[contains(@class, "promo-image")]', '//div[contains(@id, "entry-body")]', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/treelobsters.com.php000066400000000000000000000002131265625172200226440ustar00rootroot00000000000000 array( '%.*%' => array( '%title="(.+)" */>%' => '/>
$1', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/twogag.com.php000066400000000000000000000003031265625172200214170ustar00rootroot00000000000000 array( '%.*%' => array( '%http://www.twogag.com/comics-rss/([^.]+)\\.jpg%' => 'http://www.twogag.com/comics/$1.jpg', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/twokinds.keenspot.com.php000066400000000000000000000005231265625172200236240ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://twokinds.keenspot.com/archive.php?p=0', 'body' => array('//*[@class="comic"]/div/a/img | //*[@class="comic"]/div/img | //*[@id="cg_img"]/img | //*[@id="cg_img"]/a/img'), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/undeadly.org.php000066400000000000000000000005551265625172200217560ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://undeadly.org/cgi?action=article&sid=20141101181155', 'body' => array( '/html/body/table[3]/tbody/tr/td[1]/table[2]/tr/td[1]', ), 'strip' => array( '//font', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/upi.com.php000066400000000000000000000007441265625172200207350ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.upi.com/Top_News/US/2015/09/26/Tech-giants-Hollywood-stars-among-guests-at-state-dinner-for-Chinas-Xi-Jinping/4541443281006/', 'body' => array( '//div[@class="img"]', '//div/article[@itemprop="articleBody"]', ), 'strip' => array( '//div[@align="center"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/version2.dk.php000066400000000000000000000005621265625172200215250ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.version2.dk/artikel/surface-pro-2-fungerer-bedre-til-arbejde-end-fornoejelse-55195', 'body' => array( '//section[contains(@class, "teaser")]', '//section[contains(@class, "body")]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/vgcats.com.php000066400000000000000000000007161265625172200214260ustar00rootroot00000000000000 array( '%/comics.*%' => array( 'test_url' => 'http://www.vgcats.com/comics/?strip_id=358', 'body' => array('//*[@align="center"]/img'), 'strip' => array(), ), '%/super.*%' => array( 'test_url' => 'http://www.vgcats.com/super/?strip_id=84', 'body' => array('//*[@align="center"]/p/img'), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/vuxml.org.php000066400000000000000000000006661265625172200213270ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.vuxml.org/freebsd/a5f160fa-deee-11e4-99f8-080027ef73ec.html', 'body' => array( '//body', ), 'strip' => array( '//h1', '//div[@class="blurb"]', '//hr', '//p[@class="copyright"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/www.bbc.co.uk.php000066400000000000000000000025451265625172200217530ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.bbc.co.uk/news/world-middle-east-23911833', 'body' => array( '//div[@class="story-body__inner"] | //div[@class="article"]', '//div[@class="indPost"]', ), 'strip' => array( '//form', '//div[@id="headline"]', '//*[@class="warning"]', '//span[@class="off-screen"]', '//span[@class="story-image-copyright"]', '//ul[@class="story-body__unordered-list"]', '//div[@class="ad_wrapper"]', '//div[@id="article-sidebar"]', '//div[@class="data-table-outer"]', '//*[@class="story-date"]', '//*[@class="story-header"]', '//figure[contains(@class,"has-caption")]', '//*[@class="story-related"]', '//*[contains(@class, "byline")]', '//p[contains(@class, "media-message")]', '//*[contains(@class, "story-feature")]', '//*[@id="video-carousel-container"]', '//*[@id="also-related-links"]', '//*[contains(@class, "share") or contains(@class, "hidden") or contains(@class, "hyper")]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/www.bdgest.com.php000066400000000000000000000006661265625172200222360ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.bdgest.com/chronique-6027-BD-Adrastee-Tome-2.html', 'body' => array( '//*[contains(@class, "chronique")]', ), 'strip' => array( '//*[contains(@class, "post-review")]', '//*[contains(@class, "footer-review")]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/www.bgr.in.php000066400000000000000000000015021265625172200213560ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.bgr.in/news/xiaomi-redmi-3-with-13-megapixel-camera-snapdragon-616-launched-price-specifications-and-features/', 'body' => array( '//div[@class="article-content"]', ), 'strip' => array( '//*[@class="article-meta"]', '//*[@class="contentAdsense300"]', '//*[@class="iwpl-social-hide"]', '//iframe[@class="iframeads"]', '//*[@class="disqus_thread"]', '//*[@class="outb-mobile OUTBRAIN"]', '//*[@class="wdt_smart_alerts"]', '//*[@class="footnote"]', '//*[@id="gadget-widget"]', '//header[@class="article-title entry-header"]', ), ), ), );picoFeed-0.1.18/lib/PicoFeed/Rules/www.businessweek.com.php000066400000000000000000000007211265625172200234650ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.businessweek.com/articles/2013-09-18/elon-musks-hyperloop-will-work-says-some-very-smart-software', 'body' => array( '//div[@id="lead_graphic"]', '//div[@id="article_body"]', ), 'strip' => array( '//*[contains(@class, "related_item")]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/www.cnn.com.php000066400000000000000000000020421265625172200215320ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.cnn.com/2013/08/31/world/meast/syria-civil-war/index.html?hpt=hp_t1', 'body' => array( '//div[@class="cnn_strycntntlft"]', ), 'strip' => array( '//div[@class="cnn_stryshrwdgtbtm"]', '//div[@class="cnn_strybtmcntnt"]', '//div[@class="cnn_strylftcntnt"]', '//div[contains(@class, "cnnGalleryContainer")]', '//div[contains(@class, "cnn_strylftcexpbx")]', '//div[contains(@class, "articleGalleryNavContainer")]', '//div[contains(@class, "cnnArticleGalleryCaptionControl")]', '//div[contains(@class, "cnnArticleGalleryNavPrevNextDisabled")]', '//div[contains(@class, "cnnArticleGalleryNavPrevNext")]', '//div[contains(@class, "cnn_html_media_title_new")]', '//div[contains(@id, "disqus")]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/www.developpez.com.php000066400000000000000000000014101265625172200231270ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.developpez.com/actu/81757/Mozilla-annonce-la-disponibilite-de-Firefox-36-qui-passe-au-HTTP-2-et-permet-la-synchronisation-de-son-ecran-d-accueil/', 'body' => array( '//*[@itemprop="articleBody"]', ), 'strip' => array( '//form', '//div[@class="content"]/img', '//a[last()]/following-sibling::*', '//*[contains(@class,"actuTitle")]', '//*[contains(@class,"date")]', '//*[contains(@class,"inlineimg")]', '//*[@id="signaler"]', '//*[@id="signalerFrame"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/www.egscomics.com.php000066400000000000000000000004401265625172200227300ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.egscomics.com/index.php?id=1690', 'title' => '/html/head/title', 'body' => array( '//img[@id="comic"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/www.fakingnews.firstpost.com.php000066400000000000000000000011021265625172200251400ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.fakingnews.firstpost.com/2016/01/engineering-student-creates-record-in-a-decade-becomes-the-first-to-completely-exhaust-ball-pen-refill/', 'body' => array( '//div[@class="entry"]', ), 'strip' => array( '//*[@class="socialshare_bar"]', '//*[@class="authorbox"]', '//*[@class="cf5_rps"]', '//*[@class="60563 fb-comments fb-social-plugin"]', ), ), ), );picoFeed-0.1.18/lib/PicoFeed/Rules/www.forbes.com.php000066400000000000000000000014411265625172200222360ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.forbes.com/sites/andygreenberg/2013/09/05/follow-the-bitcoins-how-we-got-busted-buying-drugs-on-silk-roads-black-market/', 'body' => array( '//div[@id="leftRail"]/div[contains(@class, body)]', ), 'strip' => array( '//aside', '//div[contains(@class, "entity_block")]', '//div[contains(@class, "vestpocket") and not contains(@class, "body")]', '//div[contains(@style, "display")]', '//div[contains(@id, "comment")]', '//div[contains(@class, "widget")]', '//div[contains(@class, "pagination")]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/www.franceculture.fr.php000066400000000000000000000006061265625172200234530ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.franceculture.fr/emission-culture-eco-la-finance-aime-toujours-la-france-2016-01-08', 'body' => array( '//div[@class="listen"]', '//div[@class="field-items"]', ), 'strip' => array( ), ) ) ); picoFeed-0.1.18/lib/PicoFeed/Rules/www.futura-sciences.com.php000066400000000000000000000012671265625172200240640ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.futura-sciences.com/magazines/espace/infos/actu/d/astronautique-curiosity-franchi-succes-dune-dingo-gap-52289/#xtor=RSS-8', 'body' => array( '//div[contains(@class, "content fiche-")]', ), 'strip' => array( '//h1', '//*[contains(@class, "content-date")]', '//*[contains(@class, "diaporama")]', '//*[contains(@class, "slider")]', '//*[contains(@class, "cartouche")]', '//*[contains(@class, "noprint")]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/www.geekculture.com.php000066400000000000000000000004721265625172200233000ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.geekculture.com/joyoftech/joyarchives/2180.html', 'body' => array( '//p[contains(@class,"Maintext")][2]/img', ), 'strip' => array(), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/www.howtogeek.com.php000066400000000000000000000005671265625172200227620ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.howtogeek.com/235283/what-is-a-wireless-hard-drive-and-should-i-get-one/', 'body' => array( '//div[@class="thecontent"]', ), 'strip' => array( '//*[@class="relatedside"]', ), ), ), );picoFeed-0.1.18/lib/PicoFeed/Rules/www.lepoint.fr.php000066400000000000000000000013021265625172200222550ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.lepoint.fr/c-est-arrive-aujourd-hui/19-septembre-1783-pour-la-premiere-fois-un-mouton-un-canard-et-un-coq-s-envoient-en-l-air-devant-louis-xvi-18-09-2012-1507704_494.php', 'body' => array( '//article', ), 'strip' => array( '//*[contains(@class, "info_article")]', '//*[contains(@class, "fildariane_titre")]', '//*[contains(@class, "entete2_article")]', '//*[contains(@class, "signature_article")]', '//*[contains(@id, "share")]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/www.lesnumeriques.com.php000066400000000000000000000016721265625172200236650ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.lesnumeriques.com/blender/kitchenaid-diamond-5ksb1585-p27473/test.html', 'body' => array( '//*[@id="product-content"]', '//*[@id="news-content"]', '//*[@id="article-content"]', ), 'strip' => array( '//form', '//div[contains(@class, "price-v4"])', '//div[contains(@class, "authors-and-date")]', '//div[contains(@class, "mini-product")]', '//div[@id="articles-related-authors"]', '//div[@id="tags-socials"]', '//div[@id="user-reviews"]', '//div[@id="product-reviews"]', '//div[@id="publication-breadcrumbs-and-date"]', '//div[@id="publication-breadcrumbs-and-date"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/www.mac4ever.com.php000066400000000000000000000005721265625172200224700ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.mac4ever.com/actu/87392_video-quand-steve-jobs-et-bill-gates-jouaient-au-bachelor-avec-le-mac', 'body' => array( '//div[contains(@class, "news-news-content")]', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/www.makeuseof.com.php000066400000000000000000000010421265625172200227320ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.makeuseof.com/tag/having-problems-with-audio-in-windows-10-heres-a-likely-fix/', 'body' => array( '//div[@class="entry"]', ), 'strip' => array( '//*[@class="new_sharebar"]', '//*[@class="author"]', '//*[@class="wdt_grouvi"]', '//*[@class="wdt_smart_alerts"]', '//*[@class="modal fade grouvi"]', ), ), ), );picoFeed-0.1.18/lib/PicoFeed/Rules/www.npr.org.php000066400000000000000000000012671265625172200215740ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.npr.org/blogs/thesalt/2013/09/17/223345977/auto-brewery-syndrome-apparently-you-can-make-beer-in-your-gut', 'body' => array( '//div[@id="storytext"]', ), 'strip' => array( '//*[@class="bucket img"]', '//*[@class="creditwrap"]', '//div[@class="credit-caption"]', '//*[@class="credit"]', '//*[@class="captionwrap"]', '//*[@class="toggle-caption"]', '//*[contains(@class, "enlargebtn")]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/www.numerama.com.php000066400000000000000000000006331265625172200225650ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.numerama.com/sciences/125959-recherches-ladn-recompensees-nobel-de-chimie.html', 'body' => array( '//article', ), 'strip' => array( '//footer', '//section[@class="related-article"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/www.oneindia.com.php000066400000000000000000000006301265625172200225430ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.oneindia.com/india/b-luru-govt-likely-remove-word-eunuch-from-sec-36-a-karnataka-police-act-1981173.html', 'body' => array( '//div[@class="ecom-ad-content"]', ), 'strip' => array( '//*[@id="view_cmtns"]', ), ), ), );picoFeed-0.1.18/lib/PicoFeed/Rules/www.pcinpact.com.php000066400000000000000000000006641265625172200225650ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.pcinpact.com/news/85954-air-france-ne-vous-demande-plus-deteindre-vos-appareils-electroniques.htm?utm_source=PCi_RSS_Feed&utm_medium=news&utm_campaign=pcinpact', 'body' => array( '//div[contains(@id, "actu_content")]', ), 'strip' => array( ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/www.pseudo-sciences.org.php000066400000000000000000000006521265625172200240630ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.pseudo-sciences.org/spip.php?article2275', 'body' => array( '//div[@id="art_main"]', ), 'strip' => array( '//div[@id="art_print"]', '//div[@id="art_chapo"]', '//img[@class="puce"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/www.slate.fr.php000066400000000000000000000013711265625172200217210ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.slate.fr/monde/77034/allemagne-2013-couacs-campagne', 'body' => array( '//div[@class="article_content"]', ), 'strip' => array( '//*[@id="slate_associated_bn"]', '//*[@id="ligatus-article"]', '//*[@id="article_sidebar"]', '//div[contains(@id, "reseaux")]', '//*[contains(@class, "smart") or contains(@class, "article_tags") or contains(@class, "article_reactions")]', '//*[contains(@class, "OUTBRAIN") or contains(@class, "related_item") or contains(@class, "share")]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/www.universfreebox.com.php000066400000000000000000000007041265625172200240250ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://www.universfreebox.com/article/24305/4G-Bouygues-Telecom-lance-une-vente-flash-sur-son-forfait-Sensation-3Go', 'body' => array( '//div[@id="corps_corps"]', ), 'strip' => array( '//*[@id="formulaire"]', '//*[@id="commentaire"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/www.zeit.de.php000066400000000000000000000031251265625172200215440ustar00rootroot00000000000000 array( '%^/zeit-magazin.*%' => array( 'test_url' => 'http://www.zeit.de/zeit-magazin/2015/15/pegida-kathrin-oertel-lutz-bachmann', 'body' => array( '//article[@class="article"]', ), 'strip' => array( '//header/div/h1', '//header/div/div[@class="article__head__subtitle"]', '//header/div/div[@class="article__column__author"]', '//header/div/div[@class="article__column__author"]', '//header/div/span[@class="article__head__meta-wrap"]', '//form', '//style', '//div[contains(@class, "ad-tile")]', '//div[@class="iqd-mobile-adplace"]', '//div[@id="iq-artikelanker"]', '//div[@id="js-social-services"]', '//section[@id="js-comments"]', '//aside', ), ), '%.*%' => array( 'test_url' => 'http://www.zeit.de/politik/ausland/2015-04/thessaloniki-krise-griechenland-yannis-boutaris/', 'body' => array( '//div[@class="article-body"]', ), 'strip' => array( '//*[@class="articleheader"]', '//*[@class="excerpt"]', '//div[contains(@class, "ad")]', '//div[@itemprop="video"]', '//*[@class="articlemeta"]', '//*[@class="articlemeta-clear"]', '//*[@class="zol_inarticletools"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/xkcd.com.php000066400000000000000000000002111265625172200210560ustar00rootroot00000000000000 array( '%.*%' => array( '%alt="(.+)" */>%' => '/>
$1', ), ), ); picoFeed-0.1.18/lib/PicoFeed/Rules/zdnet.com.php000066400000000000000000000016201265625172200212560ustar00rootroot00000000000000 array( '%.*%' => array( 'test_url' => 'http://zdnet.com.feedsportal.com/c/35462/f/675637/s/4a33c93e/sc/11/l/0L0Szdnet0N0Carticle0Cchina0Eus0Eagree0Eon0Ecybercrime0Ecooperation0Eamid0Econtinued0Etension0C0Tftag0FRSSbaffb68/story01.htm', 'body' => array( '//p[@class="summary"]', '//div[contains(@class,"storyBody")]', ), 'strip' => array( '//*[contains(@class,"ad-")]', '//p/span', '//script', '//p[@class="summary"]', '//div[contains(@class,"relatedContent")]', '//div[contains(@class,"loader")]', '//p[@class="photoDetails"]', '//div[@class="thumbnailSlider"]', '//div[@class="shortcodeGalleryWrapper"]', ), ), ), ); picoFeed-0.1.18/lib/PicoFeed/Scraper/000077500000000000000000000000001265625172200171525ustar00rootroot00000000000000picoFeed-0.1.18/lib/PicoFeed/Scraper/CandidateParser.php000066400000000000000000000143351265625172200227220ustar00rootroot00000000000000dom = XmlParser::getHtmlDocument(''.$html); $this->xpath = new DOMXPath($this->dom); } /** * Get the relevant content with the list of potential attributes. * * @return string */ public function execute() { $content = $this->findContentWithCandidates(); if (strlen($content) < 200) { $content = $this->findContentWithArticle(); } if (strlen($content) < 50) { $content = $this->findContentWithBody(); } return $this->stripGarbage($content); } /** * Find content based on the list of tag candidates. * * @return string */ public function findContentWithCandidates() { foreach ($this->candidatesAttributes as $candidate) { Logger::setMessage(get_called_class().': Try this candidate: "'.$candidate.'"'); $nodes = $this->xpath->query('//*[(contains(@class, "'.$candidate.'") or @id="'.$candidate.'") and not (contains(@class, "nav") or contains(@class, "page"))]'); if ($nodes !== false && $nodes->length > 0) { Logger::setMessage(get_called_class().': Find candidate "'.$candidate.'"'); return $this->dom->saveXML($nodes->item(0)); } } return ''; } /** * Find
tag. * * @return string */ public function findContentWithArticle() { $nodes = $this->xpath->query('//article'); if ($nodes !== false && $nodes->length > 0) { Logger::setMessage(get_called_class().': Find
tag'); return $this->dom->saveXML($nodes->item(0)); } return ''; } /** * Find tag. * * @return string */ public function findContentWithBody() { $nodes = $this->xpath->query('//body'); if ($nodes !== false && $nodes->length > 0) { Logger::setMessage(get_called_class().' Find '); return $this->dom->saveXML($nodes->item(0)); } return ''; } /** * Strip useless tags. * * @param string $content * * @return string */ public function stripGarbage($content) { $dom = XmlParser::getDomDocument($content); if ($dom !== false) { $xpath = new DOMXPath($dom); $this->stripTags($xpath); $this->stripAttributes($dom, $xpath); $content = $dom->saveXML($dom->documentElement); } return $content; } /** * Remove blacklisted tags. * * @param DOMXPath $xpath */ public function stripTags(DOMXPath $xpath) { foreach ($this->stripTags as $tag) { $nodes = $xpath->query('//'.$tag); if ($nodes !== false && $nodes->length > 0) { Logger::setMessage(get_called_class().': Strip tag: "'.$tag.'"'); foreach ($nodes as $node) { $node->parentNode->removeChild($node); } } } } /** * Remove blacklisted attributes. * * @param DomDocument $dom * @param DOMXPath $xpath */ public function stripAttributes(DomDocument $dom, DOMXPath $xpath) { foreach ($this->stripAttributes as $attribute) { $nodes = $xpath->query('//*[contains(@class, "'.$attribute.'") or contains(@id, "'.$attribute.'")]'); if ($nodes !== false && $nodes->length > 0) { Logger::setMessage(get_called_class().': Strip attribute: "'.$attribute.'"'); foreach ($nodes as $node) { if ($this->shouldRemove($dom, $node)) { $node->parentNode->removeChild($node); } } } } } /** * Return false if the node should not be removed. * * @param DomDocument $dom * @param DomNode $node * * @return bool */ public function shouldRemove(DomDocument $dom, $node) { $document_length = strlen($dom->textContent); $node_length = strlen($node->textContent); if ($document_length === 0) { return true; } $ratio = $node_length * 100 / $document_length; if ($ratio >= 90) { Logger::setMessage(get_called_class().': Should not remove this node ('.$node->nodeName.') ratio: '.$ratio.'%'); return false; } return true; } } picoFeed-0.1.18/lib/PicoFeed/Scraper/ParserInterface.php000066400000000000000000000003011265625172200227320ustar00rootroot00000000000000config = $config; } /** * Get the rules for an URL. * * @param string $url the URL that should be looked up * * @return array the array containing the rules */ public function getRules($url) { $hostname = parse_url($url, PHP_URL_HOST); if ($hostname !== false) { $files = $this->getRulesFileList($hostname); foreach ($this->getRulesFolders() as $folder) { $rule = $this->loadRuleFile($folder, $files); if (!empty($rule)) { return $rule; } } } return array(); } /** * Get the list of possible rules file names for a given hostname. * * @param string $hostname Hostname * * @return array */ public function getRulesFileList($hostname) { $files = array($hostname); // subdomain.domain.tld $parts = explode('.', $hostname); $len = count($parts); if ($len > 2) { $subdomain = array_shift($parts); $files[] = implode('.', $parts); // domain.tld $files[] = '.'.implode('.', $parts); // .domain.tld $files[] = $subdomain; // subdomain } elseif ($len === 2) { $files[] = '.'.implode('.', $parts); // .domain.tld $files[] = $parts[0]; // domain } return $files; } /** * Load a rule file from the defined folder. * * @param string $folder Rule directory * @param array $files List of possible file names * * @return array */ public function loadRuleFile($folder, array $files) { foreach ($files as $file) { $filename = $folder.'/'.$file.'.php'; if (file_exists($filename)) { Logger::setMessage(get_called_class().' Load rule: '.$file); return include $filename; } } return array(); } /** * Get the list of folders that contains rules. * * @return array */ public function getRulesFolders() { $folders = array(__DIR__.'/../Rules'); if ($this->config !== null && $this->config->getGrabberRulesFolder() !== null) { $folders[] = $this->config->getGrabberRulesFolder(); } return $folders; } } picoFeed-0.1.18/lib/PicoFeed/Scraper/RuleParser.php000066400000000000000000000035611265625172200217540ustar00rootroot00000000000000rules = $rules; $this->dom = XmlParser::getHtmlDocument(''.$html); $this->xpath = new DOMXPath($this->dom); } /** * Get the relevant content with predefined rules. * * @return string */ public function execute() { $this->stripTags(); return $this->findContent(); } /** * Remove HTML tags. */ public function stripTags() { if (isset($this->rules['strip']) && is_array($this->rules['strip'])) { foreach ($this->rules['strip'] as $pattern) { $nodes = $this->xpath->query($pattern); if ($nodes !== false && $nodes->length > 0) { foreach ($nodes as $node) { $node->parentNode->removeChild($node); } } } } } /** * Fetch content based on Xpath rules. */ public function findContent() { $content = ''; if (isset($this->rules['body']) && is_array($this->rules['body'])) { foreach ($this->rules['body'] as $pattern) { $nodes = $this->xpath->query($pattern); if ($nodes !== false && $nodes->length > 0) { foreach ($nodes as $node) { $content .= $this->dom->saveXML($node); } } } } return $content; } } picoFeed-0.1.18/lib/PicoFeed/Scraper/Scraper.php000066400000000000000000000157711265625172200212750ustar00rootroot00000000000000config = $config; Logger::setTimezone($this->config->getTimezone()); } /** * Disable candidates parsing. * * @return Scraper */ public function disableCandidateParser() { $this->enableCandidateParser = false; return $this; } /** * Get encoding. * * @return string */ public function getEncoding() { return $this->encoding; } /** * Set encoding. * * @param string $encoding * * @return Scraper */ public function setEncoding($encoding) { $this->encoding = $encoding; return $this; } /** * Get URL to download. * * @return string */ public function getUrl() { return $this->url; } /** * Set URL to download. * * @param string $url URL * * @return Scraper */ public function setUrl($url) { $this->url = $url; return $this; } /** * Return true if the scraper found relevant content. * * @return bool */ public function hasRelevantContent() { return !empty($this->content); } /** * Get relevant content. * * @return string */ public function getRelevantContent() { return $this->content; } /** * Get raw content (unfiltered). * * @return string */ public function getRawContent() { return $this->html; } /** * Set raw content (unfiltered). * * @param string $html * * @return Scraper */ public function setRawContent($html) { $this->html = $html; return $this; } /** * Get filtered relevant content. * * @return string */ public function getFilteredContent() { $filter = Filter::html($this->content, $this->url); $filter->setConfig($this->config); return $filter->execute(); } /** * Download the HTML content. * * @return bool */ public function download() { if (!empty($this->url)) { // Clear everything $this->html = ''; $this->content = ''; $this->encoding = ''; try { $client = Client::getInstance(); $client->setConfig($this->config); $client->setTimeout($this->config->getGrabberTimeout()); $client->setUserAgent($this->config->getGrabberUserAgent()); $client->execute($this->url); $this->url = $client->getUrl(); $this->html = $client->getContent(); $this->encoding = $client->getEncoding(); return true; } catch (ClientException $e) { Logger::setMessage(get_called_class().': '.$e->getMessage()); } } return false; } /** * Execute the scraper. */ public function execute() { $this->download(); if (!$this->skipProcessing()) { $this->prepareHtml(); $parser = $this->getParser(); if ($parser !== null) { $this->content = $parser->execute(); Logger::setMessage(get_called_class().': Content length: '.strlen($this->content).' bytes'); } } } /** * Returns true if the parsing must be skipped. * * @return bool */ public function skipProcessing() { $handlers = array( 'detectStreamingVideos', 'detectPdfFiles', ); foreach ($handlers as $handler) { if ($this->$handler()) { return true; } } if (empty($this->html)) { Logger::setMessage(get_called_class().': Raw HTML is empty'); return true; } return false; } /** * Get the parser. * * @return ParserInterface */ public function getParser() { $ruleLoader = new RuleLoader($this->config); $rules = $ruleLoader->getRules($this->url); if (!empty($rules['grabber'])) { Logger::setMessage(get_called_class().': Parse content with rules'); foreach ($rules['grabber'] as $pattern => $rule) { $url = new Url($this->url); $sub_url = $url->getFullPath(); if (preg_match($pattern, $sub_url)) { Logger::setMessage(get_called_class().': Matched url '.$sub_url); return new RuleParser($this->html, $rule); } } } elseif ($this->enableCandidateParser) { Logger::setMessage(get_called_class().': Parse content with candidates'); return new CandidateParser($this->html); } return; } /** * Normalize encoding and strip head tag. */ public function prepareHtml() { $html_encoding = XmlParser::getEncodingFromMetaTag($this->html); $this->html = Encoding::convert($this->html, $html_encoding ?: $this->encoding); $this->html = Filter::stripHeadTags($this->html); Logger::setMessage(get_called_class().': HTTP Encoding "'.$this->encoding.'" ; HTML Encoding "'.$html_encoding.'"'); } /** * Return the Youtube embed player and skip processing. * * @return bool */ public function detectStreamingVideos() { if (preg_match("#(?<=v=|v\/|vi=|vi\/|youtu.be\/)[a-zA-Z0-9_-]{11}#", $this->url, $matches)) { $this->content = ''; return true; } return false; } /** * Skip processing for PDF documents. * * @return bool */ public function detectPdfFiles() { return substr($this->url, -3) === 'pdf'; } } picoFeed-0.1.18/lib/PicoFeed/Serialization/000077500000000000000000000000001265625172200203705ustar00rootroot00000000000000picoFeed-0.1.18/lib/PicoFeed/Serialization/Export.php000066400000000000000000000057761265625172200224010ustar00rootroot00000000000000content = $content; } /** * Get the OPML document. * * @return string */ public function execute() { $xml = new SimpleXMLElement(''); $head = $xml->addChild('head'); $head->addChild('title', 'OPML Export'); $body = $xml->addChild('body'); foreach ($this->content as $category => $values) { if (is_string($category)) { $this->createCategory($body, $category, $values); } else { $this->createEntry($body, $values); } } return $xml->asXML(); } /** * Create a feed entry. * * @param SimpleXMLElement $parent Parent Element * @param array $feed Feed properties */ public function createEntry(SimpleXMLElement $parent, array $feed) { $valid = true; foreach ($this->required_fields as $field) { if (!isset($feed[$field])) { $valid = false; break; } } if ($valid) { $outline = $parent->addChild('outline'); $outline->addAttribute('xmlUrl', $feed['feed_url']); $outline->addAttribute('htmlUrl', $feed['site_url']); $outline->addAttribute('title', $feed['title']); $outline->addAttribute('text', $feed['title']); $outline->addAttribute('description', isset($feed['description']) ? $feed['description'] : $feed['title']); $outline->addAttribute('type', 'rss'); $outline->addAttribute('version', 'RSS'); } } /** * Create entries for a feed list. * * @param SimpleXMLElement $parent Parent Element * @param array $feeds Feed list */ public function createEntries(SimpleXMLElement $parent, array $feeds) { foreach ($feeds as $feed) { $this->createEntry($parent, $feed); } } /** * Create a category entry. * * @param SimpleXMLElement $parent Parent Element * @param string $category Category * @param array $feeds Feed properties */ public function createCategory(SimpleXMLElement $parent, $category, array $feeds) { $outline = $parent->addChild('outline'); $outline->addAttribute('text', $category); $this->createEntries($outline, $feeds); } } picoFeed-0.1.18/lib/PicoFeed/Serialization/Import.php000066400000000000000000000076671265625172200223730ustar00rootroot00000000000000content = $content; } /** * Parse the OPML file. * * @return array|false */ public function execute() { Logger::setMessage(get_called_class().': start importation'); $xml = XmlParser::getSimpleXml(trim($this->content)); if ($xml === false || $xml->getName() !== 'opml' || !isset($xml->body)) { Logger::setMessage(get_called_class().': OPML tag not found or malformed XML document'); return false; } $this->parseEntries($xml->body); Logger::setMessage(get_called_class().': '.count($this->items).' subscriptions found'); return $this->items; } /** * Parse each entries of the subscription list. * * @param SimpleXMLElement $tree XML node */ public function parseEntries(SimpleXMLElement $tree) { if (isset($tree->outline)) { foreach ($tree->outline as $item) { if (isset($item->outline)) { $this->parseEntries($item); } elseif ((isset($item['text']) || isset($item['title'])) && isset($item['xmlUrl'])) { $entry = new StdClass(); $entry->category = $this->findCategory($tree); $entry->title = $this->findTitle($item); $entry->feed_url = $this->findFeedUrl($item); $entry->site_url = $this->findSiteUrl($item, $entry); $entry->type = $this->findType($item); $entry->description = $this->findDescription($item, $entry); $this->items[] = $entry; } } } } /** * Find category. * * @param SimpleXmlElement $tree XML tree * * @return string */ public function findCategory(SimpleXmlElement $tree) { return isset($tree['title']) ? (string) $tree['title'] : (string) $tree['text']; } /** * Find title. * * @param SimpleXmlElement $item XML tree * * @return string */ public function findTitle(SimpleXmlElement $item) { return isset($item['title']) ? (string) $item['title'] : (string) $item['text']; } /** * Find feed url. * * @param SimpleXmlElement $item XML tree * * @return string */ public function findFeedUrl(SimpleXmlElement $item) { return (string) $item['xmlUrl']; } /** * Find site url. * * @param SimpleXmlElement $item XML tree * @param StdClass $entry Feed entry * * @return string */ public function findSiteUrl(SimpleXmlElement $item, StdClass $entry) { return isset($item['htmlUrl']) ? (string) $item['htmlUrl'] : $entry->feed_url; } /** * Find type. * * @param SimpleXmlElement $item XML tree * * @return string */ public function findType(SimpleXmlElement $item) { return isset($item['version']) ? (string) $item['version'] : isset($item['type']) ? (string) $item['type'] : 'rss'; } /** * Find description. * * @param SimpleXmlElement $item XML tree * @param StdClass $entry Feed entry * * @return string */ public function findDescription(SimpleXmlElement $item, StdClass $entry) { return isset($item['description']) ? (string) $item['description'] : $entry->title; } } picoFeed-0.1.18/lib/PicoFeed/Syndication/000077500000000000000000000000001265625172200200375ustar00rootroot00000000000000picoFeed-0.1.18/lib/PicoFeed/Syndication/Atom.php000066400000000000000000000140051265625172200214500ustar00rootroot00000000000000checkRequiredProperties($this->required_feed_properties, $this); $this->dom = new DomDocument('1.0', 'UTF-8'); $this->dom->formatOutput = true; // $feed = $this->dom->createElement('feed'); $feed->setAttributeNodeNS(new DomAttr('xmlns', 'http://www.w3.org/2005/Atom')); // $generator = $this->dom->createElement('generator', 'PicoFeed'); $generator->setAttribute('uri', 'https://github.com/fguillot/picoFeed'); $feed->appendChild($generator); // $title = $this->dom->createElement('title'); $title->appendChild($this->dom->createTextNode($this->title)); $feed->appendChild($title); // <id/> $id = $this->dom->createElement('id'); $id->appendChild($this->dom->createTextNode($this->site_url)); $feed->appendChild($id); // <updated/> $this->addUpdated($feed, $this->updated); // <link rel="alternate" type="text/html" href="http://example.org/"/> $this->addLink($feed, $this->site_url); // <link rel="self" type="application/atom+xml" href="http://example.org/feed.atom"/> $this->addLink($feed, $this->feed_url, 'self', 'application/atom+xml'); // <author/> if (isset($this->author)) { $this->addAuthor($feed, $this->author); } // <entry/> foreach ($this->items as $item) { $this->checkRequiredProperties($this->required_item_properties, $item); $feed->appendChild($this->createEntry($item)); } $this->dom->appendChild($feed); if ($filename) { $this->dom->save($filename); } else { return $this->dom->saveXML(); } } /** * Create item entry. * * @param arrray $item Item properties * * @return DomElement */ public function createEntry(array $item) { $entry = $this->dom->createElement('entry'); // <title/> $title = $this->dom->createElement('title'); $title->appendChild($this->dom->createTextNode($item['title'])); $entry->appendChild($title); // <id/> $id = $this->dom->createElement('id'); $id->appendChild($this->dom->createTextNode(isset($item['id']) ? $item['id'] : $item['url'])); $entry->appendChild($id); // <updated/> $this->addUpdated($entry, isset($item['updated']) ? $item['updated'] : ''); // <published/> if (isset($item['published'])) { $entry->appendChild($this->dom->createElement('published', date(DATE_ATOM, $item['published']))); } // <link rel="alternate" type="text/html" href="http://example.org/"/> $this->addLink($entry, $item['url']); // <summary/> if (isset($item['summary'])) { $summary = $this->dom->createElement('summary'); $summary->appendChild($this->dom->createTextNode($item['summary'])); $entry->appendChild($summary); } // <content/> if (isset($item['content'])) { $content = $this->dom->createElement('content'); $content->setAttribute('type', 'html'); $content->appendChild($this->dom->createCDATASection($item['content'])); $entry->appendChild($content); } // <author/> if (isset($item['author'])) { $this->addAuthor($entry, $item['author']); } return $entry; } /** * Add Link. * * @param DomElement $xml XML node * @param string $url URL * @param string $rel Link rel attribute * @param string $type Link type attribute */ public function addLink(DomElement $xml, $url, $rel = 'alternate', $type = 'text/html') { $link = $this->dom->createElement('link'); $link->setAttribute('rel', $rel); $link->setAttribute('type', $type); $link->setAttribute('href', $url); $xml->appendChild($link); } /** * Add publication date. * * @param DomElement $xml XML node * @param int $value Timestamp */ public function addUpdated(DomElement $xml, $value = 0) { $xml->appendChild($this->dom->createElement( 'updated', date(DATE_ATOM, $value ?: time()) )); } /** * Add author. * * @param DomElement $xml XML node * @param array $values Author name and email */ public function addAuthor(DomElement $xml, array $values) { $author = $this->dom->createElement('author'); if (isset($values['name'])) { $name = $this->dom->createElement('name'); $name->appendChild($this->dom->createTextNode($values['name'])); $author->appendChild($name); } if (isset($values['email'])) { $email = $this->dom->createElement('email'); $email->appendChild($this->dom->createTextNode($values['email'])); $author->appendChild($email); } if (isset($values['url'])) { $uri = $this->dom->createElement('uri'); $uri->appendChild($this->dom->createTextNode($values['url'])); $author->appendChild($uri); } $xml->appendChild($author); } } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������picoFeed-0.1.18/lib/PicoFeed/Syndication/Rss20.php��������������������������������������������������0000664�0000000�0000000�00000013514�12656251722�0021465�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<?php namespace PicoFeed\Syndication; use DomDocument; use DomAttr; use DomElement; /** * Rss 2.0 writer class. * * @author Frederic Guillot */ class Rss20 extends Writer { /** * List of required properties for each feed. * * @var array */ private $required_feed_properties = array( 'title', 'site_url', 'feed_url', ); /** * List of required properties for each item. * * @var array */ private $required_item_properties = array( 'title', 'url', ); /** * Get the Rss 2.0 document. * * @param string $filename Optional filename * * @return string */ public function execute($filename = '') { $this->checkRequiredProperties($this->required_feed_properties, $this); $this->dom = new DomDocument('1.0', 'UTF-8'); $this->dom->formatOutput = true; // <rss/> $rss = $this->dom->createElement('rss'); $rss->setAttribute('version', '2.0'); $rss->setAttributeNodeNS(new DomAttr('xmlns:content', 'http://purl.org/rss/1.0/modules/content/')); $rss->setAttributeNodeNS(new DomAttr('xmlns:atom', 'http://www.w3.org/2005/Atom')); $channel = $this->dom->createElement('channel'); // <generator/> $generator = $this->dom->createElement('generator', 'PicoFeed (https://github.com/fguillot/picoFeed)'); $channel->appendChild($generator); // <title/> $title = $this->dom->createElement('title'); $title->appendChild($this->dom->createTextNode($this->title)); $channel->appendChild($title); // <description/> $description = $this->dom->createElement('description'); $description->appendChild($this->dom->createTextNode($this->description ?: $this->title)); $channel->appendChild($description); // <pubDate/> $this->addPubDate($channel, $this->updated); // <atom:link/> $link = $this->dom->createElement('atom:link'); $link->setAttribute('href', $this->feed_url); $link->setAttribute('rel', 'self'); $link->setAttribute('type', 'application/rss+xml'); $channel->appendChild($link); // <link/> $link = $this->dom->createElement('link'); $link->appendChild($this->dom->createTextNode($this->site_url)); $channel->appendChild($link); // <webMaster/> if (isset($this->author)) { $this->addAuthor($channel, 'webMaster', $this->author); } // <item/> foreach ($this->items as $item) { $this->checkRequiredProperties($this->required_item_properties, $item); $channel->appendChild($this->createEntry($item)); } $rss->appendChild($channel); $this->dom->appendChild($rss); if ($filename) { $this->dom->save($filename); } else { return $this->dom->saveXML(); } } /** * Create item entry. * * @param arrray $item Item properties * * @return DomElement */ public function createEntry(array $item) { $entry = $this->dom->createElement('item'); // <title/> $title = $this->dom->createElement('title'); $title->appendChild($this->dom->createTextNode($item['title'])); $entry->appendChild($title); // <link/> $link = $this->dom->createElement('link'); $link->appendChild($this->dom->createTextNode($item['url'])); $entry->appendChild($link); // <guid/> if (isset($item['id'])) { $guid = $this->dom->createElement('guid'); $guid->setAttribute('isPermaLink', 'false'); $guid->appendChild($this->dom->createTextNode($item['id'])); $entry->appendChild($guid); } else { $guid = $this->dom->createElement('guid'); $guid->setAttribute('isPermaLink', 'true'); $guid->appendChild($this->dom->createTextNode($item['url'])); $entry->appendChild($guid); } // <pubDate/> $this->addPubDate($entry, isset($item['updated']) ? $item['updated'] : ''); // <description/> if (isset($item['summary'])) { $description = $this->dom->createElement('description'); $description->appendChild($this->dom->createTextNode($item['summary'])); $entry->appendChild($description); } // <content/> if (isset($item['content'])) { $content = $this->dom->createElement('content:encoded'); $content->appendChild($this->dom->createCDATASection($item['content'])); $entry->appendChild($content); } // <author/> if (isset($item['author'])) { $this->addAuthor($entry, 'author', $item['author']); } return $entry; } /** * Add publication date. * * @param DomElement $xml XML node * @param int $value Timestamp */ public function addPubDate(DomElement $xml, $value = 0) { $xml->appendChild($this->dom->createElement( 'pubDate', date(DATE_RSS, $value ?: time()) )); } /** * Add author. * * @param DomElement $xml XML node * @param string $tag Tag name * @param array $values Author name and email */ public function addAuthor(DomElement $xml, $tag, array $values) { $value = ''; if (isset($values['email'])) { $value .= $values['email']; } if ($value && isset($values['name'])) { $value .= ' ('.$values['name'].')'; } if ($value) { $author = $this->dom->createElement($tag); $author->appendChild($this->dom->createTextNode($value)); $xml->appendChild($author); } } } ������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������picoFeed-0.1.18/lib/PicoFeed/Syndication/Writer.php�������������������������������������������������0000664�0000000�0000000�00000003175�12656251722�0022032�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������<?php namespace PicoFeed\Syndication; use RuntimeException; /** * Base writer class. * * @author Frederic Guillot */ abstract class Writer { /** * Dom object. * * @var \DomDocument */ protected $dom; /** * Items. * * @var array */ public $items = array(); /** * Author. * * @var array */ public $author = array(); /** * Feed URL. * * @var string */ public $feed_url = ''; /** * Website URL. * * @var string */ public $site_url = ''; /** * Feed title. * * @var string */ public $title = ''; /** * Feed description. * * @var string */ public $description = ''; /** * Feed modification date (timestamp). * * @var int */ public $updated = 0; /** * Generate the XML document. * * @abstract * * @param string $filename Optional filename * * @return string */ abstract public function execute($filename = ''); /** * Check required properties to generate the output. * * @param array $properties List of properties * @param mixed $container Object or array container */ public function checkRequiredProperties(array $properties, $container) { foreach ($properties as $property) { if ((is_object($container) && !isset($container->$property)) || (is_array($container) && !isset($container[$property]))) { throw new RuntimeException('Required property missing: '.$property); } } } } ���������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������picoFeed-0.1.18/picofeed����������������������������������������������������������������������������0000775�0000000�0000000�00000005246�12656251722�0015042�0����������������������������������������������������������������������������������������������������ustar�00root����������������������������root����������������������������0000000�0000000������������������������������������������������������������������������������������������������������������������������������������������������������������������������#!/usr/bin/env php <?php require_once 'vendor/autoload.php'; use PicoFeed\Config\Config; use PicoFeed\Scraper\Scraper; use PicoFeed\Reader\Reader; use PicoFeed\Logging\Logger; use PicoFeed\PicoFeedException; Logger::enable(); function get_feed($url, $disable_filtering = false) { try { $reader = new Reader; $resource = $reader->discover($url); $parser = $reader->getParser( $resource->getUrl(), $resource->getContent(), $resource->getEncoding() ); if ($disable_filtering) { $parser->disableContentFiltering(); } return $parser->execute(); } catch (PicoFeedException $e) { echo 'Exception thrown ===> "'.$e->getMessage().'"'.PHP_EOL; return false; } } function get_item($feed, $item_id) { foreach ($feed->items as $item) { if ($item->getId() === $item_id) { echo $item; echo "============= CONTENT ================\n"; echo $item->getContent(); echo "\n============= CONTENT ================\n"; break; } } } function dump_feed($url) { $feed = get_feed($url); echo $feed; } function debug_feed($url) { get_feed($url); print_r(Logger::getMessages()); } function dump_item($url, $item_id) { $feed = get_feed($url); if ($feed !== false) { get_item($feed, $item_id); } } function nofilter_item($url, $item_id) { $feed = get_feed($url, true); if ($feed !== false) { get_item($feed, $item_id); } } function grabber($url) { $grabber = new Scraper(new Config); $grabber->setUrl($url); $grabber->execute(); print_r(Logger::getMessages()); echo "============= CONTENT ================\n"; echo $grabber->getRelevantContent().PHP_EOL; echo "============= FILTERED ================\n"; echo $grabber->getFilteredContent().PHP_EOL; } // Parse command line arguments if ($argc === 4) { switch ($argv[1]) { case 'item': dump_item($argv[2], $argv[3]); die; case 'nofilter': nofilter_item($argv[2], $argv[3]); die; } } else if ($argc === 3) { switch ($argv[1]) { case 'feed': dump_feed($argv[2]); die; case 'debug': debug_feed($argv[2]); die; case 'grabber': grabber($argv[2]); die; } } printf("Usage:\n"); printf("%s feed <feed-url>\n", $argv[0]); printf("%s debug <feed-url>\n", $argv[0]); printf("%s item <feed-url> <item-id>\n", $argv[0]); printf("%s nofilter <feed-url> <item-id>\n", $argv[0]); printf("%s grabber <url>\n", $argv[0]); ��������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������������