/*########################## textTools.js ############################*/





// Tanzil Text Tools

// By: Hamid Zarrabi-Zadeh

// http://tanzil.info



// Licensed under GPL





//---------------------- Text Tools -----------------------



function TextTools()

{

	// matching rulles 

	this.matchingRules = new Array(

		
		
		["$HAMZA_SHAPE", "$HAMZA_SHAPE"],
		
		["$ALEF_WITH_HAMZA_BELOW", "DD"],
		
		["$YEH_WITH_HAMZA", "FF"],

		["$ALEF_MAKSURA", "$FATHA$ALEF_MAKSURA"],
		
		
		//["$ALEF", "[$ALEF$ALEF_MAKSURA$ALEF_WITH_MADDA_ABOVE$ALEF_WITH_HAMZA_ABOVE$ALEF_WITH_HAMZA_BELOW$ALEF_WASLA$SUPERSCRIPT_ALEF$ALEF_FARSI_MAD]"],  
		["$ALEF", "[$ALEF$SUPERSCRIPT_ALEF$ALEF_WASLA$ALEF_FARSI_MAD]"],  
		
		
		
		["$KAF", "[$KAF$KAF_FARSI]"],  

		["[$TEH$MARBUTA]", "[$TEH$MARBUTA]"], 
		
		

		
		["$HEH", "[$HEH$FARSI_HEH$MARBUTA]"], 

		["$WAW", "[$WAW$WAW_WITH_HAMZA_ABOVE$SMALL_WAW]"], 

		["$YEH", "[$YEH$ALEF_MAKSURA$SMALL_YEH$SMALL_D_YEH]"], 
		
		["$ALEF_WITH_MADDA_ABOVE", "($ALEF_WITH_MADDA_ABOVE|$ALEF_WITH_HAMZA_ABOVE|[$HAMZA|$HAMZA_ABOVE]$HARAKA*$ALEF)"],

		
		
		["DD", "[$ALEF_WITH_HAMZA_BELOW$YEH_WITH_HAMZA]"],
		
		["FF", "[$ALEF_WITH_HAMZA_BELOW$HAMZA]"],

		[" ", "$SPACE"],
		
		["W", "$WAW{0,1}"],
		
		["A", "$ALEF{0,1}"],
		
		["Y", "$YEH{0,1}"],
		
		["O", "$MAGRAAHA{0,1}"],
		
		["G", "$YEH$SUKUN|$ALEF$SUKUN"]
		
		
		
		
		
	);



	// wildcards

	this.wildcardRegs = new Array(

		["\\.", "P"],

		["\\*", "S"],

		["[?؟]", "Q"],

		["[QS]*S[QS]*", "S"],

		["^\s*[QS]*", ""]

	);



	// wildcards

	this.wildcards = new Array(

		["S", "$LETTER_HARAKA*"],

		//["S", "($LETTER|$HARAKA)*"],

		["Q", "$LETTER?"],

		["P", "$LETTER"]

	);



	this.preProcess = new Array(

		["[$FARSI_YEH$YEH_BARREE]", "$YEH"],

		["[$FARSI_KEHEH$SWASH_KAF]", "$KAF"]

	);



	// init

	for (var i in UGroups)

		UGroups[i] = this.regTrans(UGroups[i]);



}





//---------------------- General Functions -----------------------





// translate a symbolic regExp

TextTools.prototype.regTrans = function(str)

{

	return str.replace(/\$([A-Z_]+)/g, function(s, i, ofs, all) { return UGroups[i] || UChars[i] || ''; } );

}





// simulate preg_replace 

TextTools.prototype.pregReplace = function(fromExp, toExp, str)

{

	fromExp = new RegExp(this.regTrans(fromExp), 'g');

	toExp = this.regTrans(toExp);

	return str.replace(fromExp, toExp);

}





// apply a set of rules to a string

TextTools.prototype.applyRules = function(rules, str)

{

	for (var i in rules)

		str = this.pregReplace(rules[i][0], rules[i][1], str);

	return str;

}





//---------------------- Number Functions -----------------------





// convert english digits to arabic-indic

TextTools.prototype.arabicNumber = function(str)

{

	var res = String(str).replace(/([0-9])/g,

		function(s, n, ofs, all) { return String.fromCharCode(0x0660+ n* 1);} ); 

	return res;

}



// convert english digits to farsi

TextTools.prototype.farsiNumber = function(str)

{

	var res = String(str).replace(/([0-9])/g,

		function(s, n, ofs, all) { return String.fromCharCode(0x06F0+ n* 1);} ); 

	return res;

}



// returns Arabic name of a number (1-39)

TextTools.prototype.arabicNumberName = function(num)

{

	var unary = Array('الاول', 'الثاني', 'الثالث', 'الرابع', 'الخامس', 'السادس', 'السابع', 'الثامن', 'التاسع', 'العاشر');

	var decimal = Array('عشر', 'العشرون', 'الثلاثون');



	if (num <1 || num>=40) return '';

	if (num <= 10) return unary[num- 1];



	var digit = num% 10;

	var dec = parseInt(num/ 10);



	unary[0] = 'الحادي';

	var res = (digit > 0 ? unary[digit- 1]+ (dec > 1 ? ' و' : ' ') : '')+ decimal[dec- 1];

	return res;

}





//--------------------- Text Modification --------------------





// revise text according to args

TextTools.prototype.fixText = function(text, args)

{

	//args : {showSigns, showSmallAlef, ignoreInternalSigns, font}

	args = args || {};

	if (args.showSigns)

	{

		text = this.pregReplace(' ([$HIGH_SALA-$HIGH_SEEN])', '<span class="sign">&nbsp;$1</span>', text);

		text = this.pregReplace('([$RUB_EL_HIZB$SAJDAH])', args.ignoreInternalSigns ? '' : '<span class="internal-sign">$1</span>', text);

	}

	else

		text = this.pregReplace('[$HIGH_SALA-$RUB_EL_HIZB$SAJDAH]', '', text);

	

	if (!args.showSmallAlef)

		text = this.pregReplace('$SUPERSCRIPT_ALEF', '', text);



	if (args.font == 'me_quran')
	{
		//text = this.addSpaceTatweel(text);

		text = this.pregReplace('$FATHA$SUPERSCRIPT_ALEF', '$FATHA$SMALL_ALEF', text);
	}

	else

		text = this.removeExtraMeems(text);
		
		
	

	text = this.pregReplace('$ALEF$MADDA', '$ALEF_WITH_MADDA_ABOVE', text);
	
	

	return text;

}





// add space/tatweel before small-alef

TextTools.prototype.addSpaceTatweel = function(text)

{

	text = this.pregReplace('($FATHA)($SUPERSCRIPT_ALEF)', '$1$TATWEEL$2', text);

	//text = this.pregReplace('([$HAMZA$DAL-$ZAIN$WAW]$FATHA)$TATWEEL($SUPERSCRIPT_ALEF)', '$1 $2', text);

	text = this.pregReplace('([$HAMZA$DAL-$ZAIN$WAW]$FATHA)$TATWEEL($SUPERSCRIPT_ALEF)', '$1$2', text);

	return text;

}





// remove extra meems in Uthmani text

TextTools.prototype.removeExtraMeems = function(text)

{

	text = this.pregReplace('([$FATHATAN$DAMMATAN])$LOW_MEEM', '$1', text);

	text = this.pregReplace('($KASRATAN)$HIGH_MEEM', '$1', text);

	return text;

}


TextTools.prototype.addSilentLetters  = function(text)

{
	//Silent WAW
	text = this.pregReplace('([$ALEF])([$MARBUTA$HEH])', 'W$1$2', text);
	
	//Silent Alef in MEA'AH 
	text = this.pregReplace('([$MEEM])([$ALEF_WITH_HAMZA_BELOW|$YEH_WITH_HAMZA])([$MARBUTA$HEH$TEH])', '$1A$2$3', text);
	
	//Silent Alef in JEEA'A
		text = this.pregReplace('([$JEMM])([$YEH][$YEH_WITH_HAMZA])', '$1A$2', text);
	
	//Silent YEH in AFAA'EN
	text = this.pregReplace('([$FEH])([$ALEF|$ALEF_WITH_HAMZA_BELOW])([$NOON])', '$1$2Y$3', text);
	
	//Silent YEH in BEA'YOKOM
	text = this.pregReplace('([$ALEF|$ALEF_WITH_HAMZA_ABOVE][$YEH])([$KAF][$MEEM])', '$1Y$2', text);
	
	//Silent YEH in A'AYD
	text = this.pregReplace('([$ALEF|$ALEF_WITH_HAMZA_ABOVE][$YEH])([$DAL])', '$1Y$2', text);
	
	
	//MSGRAHA CASE
	text = this.pregReplace('($MEEM$JEMM$REH)', '$1O', text);
	
	return text;

}





// highlight a pattern in a string

TextTools.prototype.highlight = function(pattern, str)

{

	pattern = new RegExp('('+ pattern+ ')', 'g');

	str = str.replace(pattern, '◄$1►');

	str = str.replace(/◄\s/g, ' ◄').replace(/\s►/g, '► ');

	str = str.replace(/([^\s]*)◄/g, '◄$1').replace(/►([^\s]*)/g, '$1►');

	while (/◄[^\s]*◄/.test(str))

		str = str.replace(/(◄[^\s]*)◄/g, '$1').replace(/►([^\s]*►)/g, '$1');

	str = str.replace(/◄/g, '<span class="highlight">').replace(/►/g, '</span>');

	return str;

}



//---------------------- Pattern Modification -----------------------



// enrich arabic search pattern 

TextTools.prototype.enrichPattern = function(pattern, ignoreHaraka) 

{
	
	pattern=this.addSilentLetters(pattern);
	if (ignoreHaraka)

		pattern = this.pregReplace("$HARAKA", '', pattern);

	pattern =this.pregReplace('(\\s|"|^)($WAW)?($YEH$ALEF)\\s', '$2$3',pattern);
	

	pattern = this.regTrans(pattern); // allows using letter constants in pattern

	pattern = this.handleSpaces(pattern);
	
		

	pattern = this.applyRules(this.preProcess, pattern);

	pattern = this.applyRules(this.wildcardRegs, pattern);



	// add haraka between letters

	pattern = this.pregReplace("(.)", "$1$HARAKA*", pattern);



	pattern = this.applyRules(this.matchingRules, pattern);

	pattern = this.applyRules(this.wildcards, pattern);



	return pattern;

}



// handle spaces in the search pattern 

TextTools.prototype.handleSpaces = function(pattern) 

{

	var prev = '';

	if (pattern == '') return pattern;

	

	pattern = pattern.replace(/^([^"]*")([^"]*)("*[^"\s]*)/g, '$1 $2 $3');
	
	pattern = pattern.replace(/\s+/g, ' ');	
	
	
	while (pattern != prev)

	{

		prev = pattern;

		//pattern = pattern.replace(/^([^"]*)("[^"]*")*([^"\s]*) /g, '$1$2$3+');

		
		pattern = pattern.replace(/^(([^"]*"[^"]*")*)([^"\s]*) /g, '$1$3+');

	}
	
	
	
	pattern = pattern.replace(/_/g, ' ');

	pattern = pattern.replace(/"/g, '+');

	// remove extra operators

	pattern = pattern.replace(/^[+|]+/g, '').replace(/[+|!]+$/g, '');

	pattern = pattern.replace(/\+*([+|!])\+*/g, '$1');



	return pattern;

}



//---------------------- Search Functions -----------------------





// search text for a mixed pattern

TextTools.prototype.search = function(text, pattern, range) 

{

	patterns = pattern.split('+');

	var res = this.searchArray(text, patterns[0], range);

	var bucket = res.results;

	var total = res.totalMatch;



	for (i=1; i<patterns.length; i++)

	{

		bucket = this.searchBucket(text, patterns[i], bucket);

		total = bucket.length;

	}

	return {results: bucket, totalMatch: total};

}





// search text for a pattern in a range

TextTools.prototype.searchArray = function(text, pattern, range) 

{

	var res = [];

	var totalMatch = 0;

	var line, items;

	range = range || {from: 0, to: text.length};

	pattern = new RegExp(pattern, 'g'); 



	for (var i=range.from; i<range.to; i++)

	{

		line = ' '+ text[i]+ ' ';

		items = line.match(pattern)

		if (items)

		{

			totalMatch += items.length;

			res.push(i);

		}

	}

	return {results: res, totalMatch: totalMatch};

}





// search text for a pattern in a bucket

TextTools.prototype.searchBucket = function(text, pattern, bucket) 

{

	var res = [];

	var line, items;

	pattern = new RegExp(pattern, 'g'); 



	for (var i=0; i<bucket.length; i++)

	{

		line = ' '+ text[bucket[i]]+ ' ';

		items = line.match(pattern)

		if (items)

			res.push(bucket[i]);

	}

	return res;

}



