<?php
namespace news\system\cronjob;

use news\data\news\NewsAction;
use news\data\rss\feed\RssFeed;
use news\data\rss\feed\RssFeedEditor;
use news\data\rss\feed\RssFeedList;
use wcf\data\cronjob\Cronjob;
use wcf\data\user\User;
use wcf\system\cronjob\AbstractCronjob;
use wcf\system\database\exception\DatabaseQueryException;
use wcf\system\database\exception\DatabaseQueryExecutionException;
use wcf\system\exception\LoggedException;
use wcf\system\exception\SystemException;
use wcf\system\html\input\HtmlInputProcessor;
use wcf\system\WCF;
use wcf\util\ArrayUtil;
use wcf\util\HTTPRequest;
use wcf\util\MessageUtil;
use wcf\util\XML;


/**
 * Create News from rss feeds
 *
 * @author    {COPYRIGHT_AUTHOR}
 * @copyright {COPYRIGHT_COMPANY}
 * @license   {COPYRIGHT_LICENSE}
 * @package   {COPYRIGHT_PACKAGE}
 * @category  {COPYRIGHT_CATEGORY}
 */
class RSSFeedReaderCronjob extends AbstractCronjob {
	/**
	 * @inheritdoc
	 */
	public function execute(Cronjob $cronjob) {
		parent::execute($cronjob);
		
		$sql = "INSERT INTO	news" . WCF_N . "_rss_feed_log
					(feedID, hash, newsID)
			VALUES		(?, ?, ?)";
		$logStatement = WCF::getDB()->prepareStatement($sql);
		
		// get feeds
		$feedList = new RssFeedList();
		$feedList->getConditionBuilder()->add('isDisabled = ?', [0]);
		$feedList->getConditionBuilder()->add('lastRun + cycleTime < ?', [TIME_NOW]);
		$feedList->readObjects();
		
		foreach ($feedList as $feed) {
			try {
				// get content
				$request = new HTTPRequest($feed->url);
				$request->execute();
				$result = $request->getReply();
				$content = $result['body'];
				
				// parse xml
				$xml = new XML();
				$xml->loadXML($feed->url, $content);
				$xpath = $xml->xpath();
				
				$rootNode = $xpath->query('/*')->item(0);
				if ($rootNode === null) {
					continue;
				}
				
				if ($rootNode->nodeName != 'feed' && $rootNode->nodeName != 'rss') {
					continue;
				}
				$rss = true;
				if ($rootNode->nodeName == 'feed') $rss = false;
				$data = $this->readFeed($feed, $rss, $xpath);
				
				if (empty($data)) continue;
				
				// get user
				$user = new User($feed->userID);
				
				// get tags
				$feedTags = [];
				if (MODULE_TAGGING && $feed->newsTags) {
					$feedTags = array_unique(ArrayUtil::trim(explode(',', $feed->newsTags)));
				}
				foreach ($data as $item) {
					$tags = $feedTags;
					if ($feed->useCategoriesAsTags && !empty($item['categories'])) {
						$tags = array_unique(array_merge($tags, $item['categories']));
					}
					$newsID = $this->createNews($feed, $user, $item, $tags);
					// create log entry
					$logStatement->execute([$feed->feedID, $item['hash'], $newsID]);
				}
			}
			catch (LoggedException $e) {
				$e->getExceptionID(); // log error
			}
			
			// update last run
			$editor = new RssFeedEditor($feed);
			$editor->update(['lastRun' => TIME_NOW]);
		}
	}
	
	/**
	 * Reads a feed.
	 *
	 * @param RssFeed   $feed
	 * @param boolean   $rss
	 * @param \DOMXPath $xpath
	 *
	 * @return array
	 * @throws DatabaseQueryException
	 * @throws DatabaseQueryExecutionException
	 */
	protected function readFeed(RssFeed $feed, $rss, \DOMXPath $xpath) {
		$keywords = [];
		if ($feed->searchKeywords) {
			$keywords = array_unique(ArrayUtil::trim(preg_split('/[,;]/', mb_strtolower($feed->searchKeywords))));
		}
		$keywordsNegative = [];
		if ($feed->searchKeywordsNegative) {
			$keywordsNegative = array_unique(ArrayUtil::trim(preg_split('/[,;]/', mb_strtolower($feed->searchKeywordsNegative))));
		}
		
		$items = $xpath->query($this->getXMLPath($rss, 'items'));
		$data = [];
		$i = 0;
		foreach ($items as $item) {
			$childNodes = $xpath->query($this->getXMLPath($rss, 'child'), $item);
			foreach ($childNodes as $childNode) {
				if ($childNode->nodeName == 'category') {
					if (!isset($itemData['categories'])) {
						$itemData['categories'] = [];
					}
					
					$itemData['categories'][] = $childNode->nodeValue;
				} else {
					if (!$rss && $childNode->nodeName == 'link') {
						if (!isset($itemData[$childNode->nodeName]) && $childNode->attributes->getNamedItem('href')) {
							$rel = $childNode->attributes->getNamedItem('rel');
							if ($rel && $rel->nodeValue == 'alternate') {
								$itemData[$childNode->nodeName] = $childNode->attributes->getNamedItem('href')->nodeValue;
							}
						}
					} else {
						$itemData[$childNode->nodeName] = $childNode->nodeValue;
					}
				}
			}
			if (empty($itemData['title'])) {
				continue;
			}
			if ($rss) {
				if ((empty($itemData['description']) && empty($itemData['content:encoded']))) {
					continue;
				}
			} else {
				if (empty($itemData['id']) || empty($itemData['link']) || (empty($itemData['content']) && empty($itemData['summary']))) {
					continue;
				}
			}
			$time = false;
			if ($rss) {
				if (isset($itemData['pubDate'])) {
					$time = strtotime($itemData['pubDate']);
				}
			} else {
				if (isset($itemData['published'])) {
					$time = strtotime($itemData['published']);
				} else if (isset($itemData['updated'])) {
					$time = strtotime($itemData['updated']);
				}
			}
			if (!$time) $time = TIME_NOW;
			if ($time > TIME_NOW) continue;
			
			if ($feed->maxAge != 0 && $time != TIME_NOW) {
				if (TIME_NOW > ($time + ($feed->maxAge * 24 * 60 * 60))) {
					continue;
				}
			}
			
			$pathDescription = $this->getXMLPath($rss, 'description');
			if (!empty($itemData[$pathDescription])) {
				$description = $itemData[$pathDescription];
			} else {
				$pathDescription = $this->getXMLPath($rss, 'descriptionAlter');
				$description = $itemData[$pathDescription];
			}
			
			$hash = $this->getHash($itemData, $description, $rss);
			if ($this->checkLog($feed->feedID, $hash)) {
				continue;
			}
			
			// check search words
			if (!empty($keywords)) {
				$haystack = mb_strtolower($itemData['title'] . $description);
				$skip = true;
				foreach ($keywords as $keyword) {
					if (mb_strpos($haystack, $keyword) !== false) {
						$skip = false;
						break;
					}
				}
				if ($skip) continue;
			}
			
			//check negative word
			if (!empty($keywordsNegative)) {
				$haystack = mb_strtolower($itemData['title'] . $description);
				$skip = false;
				foreach ($keywordsNegative as $keyword) {
					if (mb_strpos($haystack, $keyword) !== false) {
						$skip = true;
						break;
					}
				}
				if ($skip) continue;
			}
			
			$data[$hash] = [
				'title'       => $itemData['title'],
				'link'        => (!empty($itemData['link']) ? $itemData['link'] : ''),
				'description' => $description,
				'time'        => $time,
				'hash'        => $hash,
				'categories'  => !empty($itemData['categories']) ? $itemData['categories'] : []
			];
			
			$i++;
			if ($feed->maxResults && $i == $feed->maxResults) {
				break;
			}
		}
		
		return $data;
	}
	
	/**
	 * Return the xml path for search
	 *
	 * @param    bool   $rss
	 * @param    string $query
	 *
	 * @return string
	 */
	private function getXMLPath($rss = true, $query) {
		if ($rss) {
			switch ($query) {
				case "items":
					return '//channel/item';
					break;
				case "child":
					return 'child::*';
					break;
				case "description":
					return 'content:encoded';
					break;
				case "descriptionAlter":
					return 'description';
					break;
			}
		} else {
			switch ($query) {
				case "items":
					return '//ns:entry';
					break;
				case "child":
					return 'child::*';
					break;
				case "description":
					return 'content';
					break;
				case "descriptionAlter":
					return 'summary';
					break;
			}
		}
		
		return '';
	}
	
	/**
	 * Return the hash for this feed content
	 *
	 * @param array   $itemData
	 * @param string  $description
	 * @param boolean $rss
	 *
	 * @return string
	 */
	private function getHash($itemData, $description, $rss) {
		if ($rss) {
			if (!empty($itemData['guid'])) {
				$hash = sha1($itemData['guid']);
			} else if (!empty($itemData['link'])) {
				$hash = sha1($itemData['link']);
			} else {
				$hash = sha1($itemData['title'] . $description);
			}
		} else {
			$hash = sha1($itemData['id']);
		}
		
		return $hash;
	}
	
	/**
	 * Return true if this hash already added
	 *
	 * @param integer $feedID
	 * @param string  $hash
	 *
	 * @return bool
	 * @throws DatabaseQueryException
	 * @throws DatabaseQueryExecutionException
	 */
	private function checkLog($feedID, $hash) {
		$sql = "SELECT	count(*) as counter
					FROM	news" . WCF_N . "_rss_feed_log WHERE hash = ? AND feedID = ?";
		$statement = WCF::getDB()->prepareStatement($sql);
		$statement->execute([$hash, $feedID]);
		$row = $statement->fetchArray();
		
		return ($row["counter"] != 0);
	}
	
	/**
	 * Create a news object for given rss entry
	 *
	 * @param RssFeed $feed
	 * @param User    $user
	 * @param array   $item
	 * @param array   $tags
	 *
	 * @throws SystemException
	 *
	 * @return integer
	 */
	private function createNews($feed, $user, $item, $tags) {
		$text = $item['description'];
		$htmlInputProcessor = new HtmlInputProcessor();
		$htmlInputProcessor->process($text, "de.wbb-elite.news.message");
		$action = new NewsAction([], 'create', [
			'data'        => [
				'subject'        => mb_substr(MessageUtil::stripCrap($item['title']), 0, 255),
				'time'           => $item['time'],
				'userID'         => $feed->userID,
				'teaser'         => '',
				'isMultilingual' => 0,
				'enableHtml'     => 1,
				'username'       => $user->username,
				'isDisabled'     => $feed->disableNews,
				'text'           => $htmlInputProcessor->getHtml()
			],
			'categoryIDs' => array_unique(ArrayUtil::trim(unserialize($feed->categoryIDs))),
			'tags'        => $tags,
			'sources'     => $this->getSource($feed, $item['link'])
		
		]);
		$resultValues = $action->executeAction();
		
		return $resultValues['returnValues']->newsID;
		
	}
	
	/**
	 * Return the source link array
	 *
	 * @param RssFeed $feed
	 * @param string  $link
	 *
	 * @return mixed[]
	 */
	private function getSource($feed, $link) {
		$sources = [];
		if (DE_WBB_ELITE_NEWS_SOURCE_ENABLE) {
			$sources = ['0' => [
				'sourceSubject' => $feed->title,
				'sourceUrl'     => MessageUtil::stripCrap($link)]];
		}
		
		return $sources;
	}
}
