// import $ from "jquery";
import cheerio from "cheerio";
import {isEmpty} from "../../../../utils/generalUtils";



// "mark" html to use in regex replace
const markReplace = '<mark tabindex="0" data-index="0" class="search-term-mark">$&</mark>';

// use to convert replace text back to html
const regExpMarks = {
	markStart: new RegExp('&lt;mark tabindex', "g"),
	markMid: new RegExp('search-term-mark"&gt;', "g"),
	markEnd: new RegExp('&lt;/mark&gt;', "g"),
};

//  the following link was used to generate the list of tags: https://way2tutorial.com/html/tag/index.php

// UNUSED: for information purposes; likely matched tags
const markTags = [
	'abbr',
	'article',
	'aside',
	'b',
	'blockquote',
	'caption',
	'cite',
	'div',
	'em',
	'figcaption',
	'footer',
	'header',
	'h1',
	'h2',
	'h3',
	'h4',
	'h5',
	'h6',
	'i',
	'label',
	'li',
	'p',
	'q',
	's',
	'section',
	'small',
	'span',
	'strong',
	'sub',
	'sup',
	'small',
	'tr',
	'u'
].join(',');


// list of tags to exclude from generated tags list for term highlighting
// cheerio object is generated with a list of html tags that DON'T match any of these
const excludeTags = [
	'html',
	'head',
	'body',
	'area',
	'audio',
	'base',
	'bdi',
	'bdo',
	'button',
	'canvas',
	'code',
	'col',
	'colgroup',
	'command',
	'data',
	'datalist',
	'dd',
	'del',
	'details',
	'dfn',
	'dialog',
	'dl',
	'dt',
	'embed',
	'fieldset',
	'figure',
	'form',
	'iframe',
	'img',
	'input',
	'ins',
	'kbd',
	'keygen',
	'legend',
	'link',
	'map',
	'mark',
	'menu',
	'meta',
	'meter',
	'nav',
	'noscript',
	'object',
	'ol',
	'optgroup',
	'option',
	'output',
	'param',
	'progress',
	'samp',
	'script',
	'select',
	'source',
	'style',
	'table',
	'tbody',
	'template',
	'textarea',
	'tfoot',
	'thead',
	'time',
	'title',
	'track',
	'ul',
	'var',
	'video',
	'wbr'
].join(',');
const notSelector = ':not(' + excludeTags + ')';


// list of words that do not generate a match; don't highlight
const excludeMatch = [
	'a',
	'an',
	'and',
	'are',
	'as',
	'at',
	'be',
	'but',
	'by',
	'for',
	'in',
	'into',
	'is',
	'it',
	'not',
	'of',
	'on',
	'or',
	'the',
	'then',
	'to',
	'with'
];


/**
 * If we have searched for an "exact" match, then find the whole search string in the body of the article.
 * If we have searched for an "any" or "all" match, then find individual words from the search string.
 * Once a match is found in the article text, then enclose the match in the "mark" tag to highlight in the
 * article markup.
 *
 * For each term/string to highlight:
 *     any/all words: check to make sure that the word is not in the list of terms to exclude from highlighting
 *     create a regex to find the term/string in the article markup
 *     if the content is marked as 'text' then wrap in a div to generate html markup
 *     convert the markup to cheerio for processing
 *     select all the cheerio nodes that DON'T match the excludeTags list
 *         NOTE: this is mostly block-level tags likely to contain text
 *         filter to get only TEXT nodes
 *     loop through all the TEXT "content" type nodes in the cheerio markup object
 *     look for the term in the text node(s) and replace with mark tags
 * finally, for 'text' type get the inner html, otherwise get the outerHTML of the cheerio markup
 * fix the "mark" tags in the html text to create actual html elements (replacing &lt; and &gt;)
 * return the articleMarkup text string
 *
 * @param params
 *     articleContent: html string for article
 *     searchTerm: search term string
 *     type: 'html' or 'text'; how to treat the articleContent
 *     exact: true: exact match with search term string; false; individual words
 *
 * @returns {*|string}
 */
const markMatches = (params) => {
	params = Object.assign({
		articleContent: '',
		searchTerm: '',
		type: 'html',
		exact: false
	}, params);

	// if exact, use as-is; else split on word and remove excluded search terms
	const terms = params.exact ? params.searchTerm : params.searchTerm.split(' ').filter(term => !excludeMatch.includes(term.toLowerCase())).join('|');
	if (isEmpty(terms)) {
		return params.articleContent;
	}
	const matchTerms = new RegExp(terms, "gi");

	// if "text" type content wrap in div for cheerio processing
	let articleMarkup = params.type === 'text' ? '<div>' + params.articleContent + '</div>' : params.articleContent;
	const $ = cheerio.load(articleMarkup, null, false);
	// create article markup from all the article content
	const $articleMarkup = $('*');
	// select ony text content nodes from markup not in the exclude list
	const $textNodes = $articleMarkup.find(notSelector).addBack(notSelector)
		.contents()
		.filter((index, node) => {
			return node.type==='text';
		});

	// loop through all the text content nodes in the cheerio object and replace text for marking
	$textNodes.each((index, elem) => {
		elem.data = elem.data.replace(matchTerms, markReplace);
	});

	// get html string; inner for text; outer for html
	articleMarkup = params.type === 'text' ? $articleMarkup.html() : $.root().html();
	// replace encoded tag markup with real tags, for example &lt; with <
	articleMarkup = articleMarkup.replace(regExpMarks.markStart, '<mark tabindex').replace(regExpMarks.markMid, 'search-term-mark">').replace(regExpMarks.markEnd, '</mark>');

	return articleMarkup;
};


/**
 * Return the html for a article attributes, with the search terms marked so they can
 * be highlighted in the generated article.
 *
 * @param params article and attributes to markup; usually
 *     article: article object
 *     attributes: which parts of the article to markup
 *     searchFilters: search filters as set by search modules
 * @returns {*} article with marked-up attributes
 */
const generateSearchMarkers = (params) => {
	params = Object.assign({
		article: {},
		attributes: [],
		searchFilters: {}
	}, params);
	const {article, attributes, searchFilters} = params;

	if (isEmpty(searchFilters.entry)) {
		return article;
	} else {
		// provide mappings for attribute parts for markup
		const contentType = {'content': 'html', 'title': 'text'};
		// loop through article sections for markup
		attributes.forEach((attribute) => {
			article[attribute] = markMatches({
				articleContent: article[attribute],
				searchTerm: searchFilters.entry,
				type: contentType[attribute],
				exact: searchFilters.type === '2'  // type: "2" === "exact"
			});
		});
	}
	return article;
};
export {generateSearchMarkers};
