1714 lines
		
	
	
		
			64 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			1714 lines
		
	
	
		
			64 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| <?php
 | |
| 
 | |
| 
 | |
| class INDIC {
 | |
| 
 | |
| /* FROM hb-ot-shape-complex-indic-private.hh */
 | |
| // indic_category
 | |
| const OT_X = 0;
 | |
| const OT_C = 1;
 | |
| const OT_V = 2;
 | |
| const OT_N = 3;
 | |
| const OT_H = 4;
 | |
| const OT_ZWNJ = 5;
 | |
| const OT_ZWJ = 6;
 | |
| const OT_M = 7;		/* Matra or Dependent Vowel */
 | |
| const OT_SM = 8;
 | |
| const OT_VD = 9;
 | |
| const OT_A = 10;
 | |
| const OT_NBSP = 11;
 | |
| const OT_DOTTEDCIRCLE = 12;	/* Not in the spec, but special in Uniscribe. /Very very/ special! */
 | |
| const OT_RS = 13; 		/* Register Shifter, used in Khmer OT spec */
 | |
| const OT_Coeng = 14;
 | |
| const OT_Repha = 15;
 | |
| const OT_Ra = 16; 		/* Not explicitly listed in the OT spec, but used in the grammar. */
 | |
| const OT_CM = 17;
 | |
| 
 | |
| 
 | |
| // Based on indic_category used to make string to find syllables
 | |
| // OT_ to string character (using e.g. OT_C from INDIC) hb-ot-shape-complex-indic-private.hh 
 | |
| public static $indic_category_char = array(
 | |
| 'x',
 | |
| 'C',
 | |
| 'V',
 | |
| 'N',
 | |
| 'H',
 | |
| 'Z',
 | |
| 'J',
 | |
| 'M',
 | |
| 'S',
 | |
| 'v', 
 | |
| 'A',	/* Spec gives Andutta U+0952 as OT_A. However, testing shows that Uniscribe
 | |
| 	* treats U+0951..U+0952 all as OT_VD - see set_indic_properties */
 | |
| 's',
 | |
| 'D',
 | |
| 'F',	/* Register shift Khmer only */
 | |
| 'G',	/* Khmer only */
 | |
| 'r',	/* 0D4E (dot reph) only one in Malayalam */
 | |
| 'R',
 | |
| 'm',	/* Consonant medial only used in Indic 0A75 in Gurmukhi  (0A00..0A7F)  : also in Lao, Myanmar, Tai Tham, Javanese & Cham  */
 | |
| );
 | |
| 
 | |
| 
 | |
| /* Visual positions in a syllable from left to right. */
 | |
| /* FROM hb-ot-shape-complex-indic-private.hh */
 | |
| // indic_position
 | |
| const POS_START = 0;
 | |
| 
 | |
| const POS_RA_TO_BECOME_REPH = 1;
 | |
| const POS_PRE_M = 2;
 | |
| const POS_PRE_C = 3;
 | |
| 
 | |
| const POS_BASE_C = 4;
 | |
| const POS_AFTER_MAIN = 5;
 | |
| 
 | |
| const POS_ABOVE_C = 6;
 | |
| 
 | |
| const POS_BEFORE_SUB = 7;
 | |
| const POS_BELOW_C = 8;
 | |
| const POS_AFTER_SUB = 9;
 | |
| 
 | |
| const POS_BEFORE_POST = 10;
 | |
| const POS_POST_C = 11;
 | |
| const POS_AFTER_POST = 12;
 | |
| 
 | |
| const POS_FINAL_C = 13;
 | |
| const POS_SMVD = 14;
 | |
| 
 | |
| const POS_END = 15;
 | |
| 
 | |
| /*
 | |
| * Basic features.
 | |
| * These features are applied in order, one at a time, after initial_reordering.
 | |
| */
 | |
| /*
 | |
|  * Must be in the same order as the indic_features array. Ones starting with _ are F_GLOBAL
 | |
|  * Ones without the _ are only applied where the mask says!
 | |
|  */
 | |
| const _NUKT = 0;
 | |
| const _AKHN = 1;
 | |
| const RPHF = 2;
 | |
| const _RKRF = 3;
 | |
| const PREF = 4;
 | |
| const BLWF = 5;
 | |
| const HALF = 6;
 | |
| const ABVF = 7;
 | |
| const PSTF = 8;
 | |
| const CFAR = 9;	// Khmer only
 | |
| const _VATU = 10;
 | |
| const _CJCT = 11;
 | |
| const INIT = 12;
 | |
| 
 | |
| 
 | |
| public static function set_indic_properties(&$info, $scriptblock ) {
 | |
| 	$u = $info['uni'];
 | |
| 	$type = self::indic_get_categories($u);
 | |
| 	$cat = ($type & 0x7F);
 | |
| 	$pos = ($type >> 8);
 | |
| 
 | |
| 	/*
 | |
| 	* Re-assign category
 | |
| 	*/
 | |
| 
 | |
| 	if ($u == 0x17D1) $cat = self::OT_X;
 | |
| 
 | |
| 	if ($cat == self::OT_X && self::in_range($u, 0x17CB, 0x17D3)) { /* Khmer Various signs */
 | |
| 	/* These are like Top Matras. */
 | |
| 		$cat = self::OT_M;
 | |
| 		$pos = self::POS_ABOVE_C;
 | |
| 	}
 | |
| 
 | |
| 	if ($u == 0x17C6) $cat = self::OT_N; /* Khmer Bindu doesn't like to be repositioned. */
 | |
| 
 | |
| 	if ($u == 0x17D2) $cat = self::OT_Coeng; /* Khmer coeng */
 | |
| 
 | |
| 	/* The spec says U+0952 is OT_A.	However, testing shows that Uniscribe
 | |
| 		* treats U+0951..U+0952 all as OT_VD.
 | |
| 		* TESTS:
 | |
| 		* U+092E,U+0947,U+0952
 | |
| 		* U+092E,U+0952,U+0947
 | |
| 		* U+092E,U+0947,U+0951
 | |
| 		* U+092E,U+0951,U+0947
 | |
| 		* */
 | |
| 	//if ($u == 0x0952) $cat = self::OT_A;
 | |
| 	if (self::in_range($u, 0x0951, 0x0954))
 | |
| 		$cat = self::OT_VD;
 | |
| 
 | |
| 	if ($u == 0x200C) $cat = self::OT_ZWNJ;
 | |
| 	else if ($u == 0x200D) $cat = self::OT_ZWJ;
 | |
| 	else if ($u == 0x25CC) $cat = self::OT_DOTTEDCIRCLE;
 | |
| 	else if ($u == 0x0A71) $cat = self::OT_SM; /* GURMUKHI ADDAK.	More like consonant medial. like 0A75. */
 | |
| 
 | |
| 	if ($cat == self::OT_Repha) {
 | |
| 		/* There are two kinds of characters marked as Repha:
 | |
| 		* - The ones that are GenCat=Mn are already positioned visually, ie. after base. (eg. Khmer)
 | |
| 		* - The ones that are GenCat=Lo is encoded logically, ie. beginning of syllable. (eg. Malayalam)
 | |
| 		*
 | |
| 		* We recategorize the first kind to look like a Nukta and attached to the base directly.
 | |
| 		*/
 | |
| 		if ($info['general_category'] == UCDN::UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
 | |
| 			$cat = self::OT_N;
 | |
| 	}
 | |
| 
 | |
| 	/*
 | |
| 	* Re-assign position.
 | |
| 	*/
 | |
| 
 | |
| 	if ((self::FLAG($cat) & (self::FLAG(self::OT_C) | self::FLAG(self::OT_CM) | self::FLAG(self::OT_Ra) | self::FLAG(self::OT_V) | self::FLAG(self::OT_NBSP) | self::FLAG(self::OT_DOTTEDCIRCLE)))) {	// = CONSONANT_FLAGS like is_consonant
 | |
| 		if ($scriptblock == UCDN::SCRIPT_KHMER) $pos = self::POS_BELOW_C;	/* Khmer differs from Indic here. */
 | |
| 		else $pos = self::POS_BASE_C;	/* Will recategorize later based on font lookups. */
 | |
| 
 | |
| 		if (self::is_ra ($u))
 | |
| 			$cat = self::OT_Ra;
 | |
| 	}
 | |
| 	else if ($cat == self::OT_M) {
 | |
| 		$pos = self::matra_position($u, $pos);
 | |
| 	}
 | |
| 	else if ($cat == self::OT_SM || $cat == self::OT_VD) {
 | |
| 		$pos = self::POS_SMVD;
 | |
| 	}
 | |
| 
 | |
| 	if ($u == 0x0B01) $pos = self::POS_BEFORE_SUB; /* Oriya Bindu is BeforeSub in the spec. */
 | |
| 
 | |
| 	$info['indic_category'] = $cat;
 | |
| 	$info['indic_position'] = $pos;
 | |
| }
 | |
| 
 | |
| // syllable_type
 | |
| const CONSONANT_SYLLABLE = 0;
 | |
| const VOWEL_SYLLABLE = 1;
 | |
| const STANDALONE_CLUSTER = 2;
 | |
| const BROKEN_CLUSTER = 3;
 | |
| const NON_INDIC_CLUSTER = 4;
 | |
| 
 | |
| public static function set_syllables(&$o, $s, &$broken_syllables) {
 | |
| 	$ptr = 0;
 | |
| 	$syllable_serial = 1;
 | |
| 	$broken_syllables = false;
 | |
| 
 | |
| 	while($ptr < strlen($s)) {
 | |
| 		$match = '';
 | |
| 		$syllable_length = 1;
 | |
| 		$syllable_type = self::NON_INDIC_CLUSTER ;
 | |
| 		// CONSONANT_SYLLABLE Consonant syllable
 | |
| 		// From OT spec:
 | |
| 		if (preg_match('/^([CR]m*[N]?(H[ZJ]?|[ZJ]H))*[CR]m*[N]?[A]?(H[ZJ]?|[M]*[N]?[H]?)?[S]?[v]{0,2}/', substr($s,$ptr), $ma)) {
 | |
| 		// From HarfBuzz:
 | |
| 		//if (preg_match('/^r?([CR]J?(Z?[N]{0,2})?[ZJ]?H(J[N]?)?){0,4}[CR]J?(Z?[N]{0,2})?A?((([ZJ]?H(J[N]?)?)|HZ)|(HJ)?([ZJ]{0,3}M[N]?(H|JHJR)?){0,4})?(S[Z]?)?[v]{0,2}/', substr($s,$ptr), $ma)) {
 | |
| 			$syllable_length = strlen($ma[0]);
 | |
| 			$syllable_type = self::CONSONANT_SYLLABLE ;
 | |
| 		}
 | |
| 		// VOWEL_SYLLABLE Vowel-based syllable
 | |
| 		// From OT spec:
 | |
| 		else if (preg_match('/^(RH|r)?V[N]?([ZJ]?H[CR]m*|J[CR]m*)?([M]*[N]?[H]?)?[S]?[v]{0,2}/', substr($s,$ptr), $ma)) {
 | |
| 		// From HarfBuzz:
 | |
| 		//else if (preg_match('/^(RH|r)?V(Z?[N]{0,2})?(J|([ZJ]?H(J[N]?)?[CR]J?(Z?[N]{0,2})?){0,4}((([ZJ]?H(J[N]?)?)|HZ)|(HJ)?([ZJ]{0,3}M[N]?(H|JHJR)?){0,4})?(S[Z]?)?[v]{0,2})/', substr($s,$ptr), $ma)) {
 | |
| 			$syllable_length = strlen($ma[0]);
 | |
| 			$syllable_type = self::VOWEL_SYLLABLE ;
 | |
| 		}
 | |
| 
 | |
| 		/* Apply only if it's a word start. */
 | |
| 		// STANDALONE_CLUSTER Stand Alone syllable at start of word
 | |
| 		// From OT spec:
 | |
| 		else if (($ptr==0 || 
 | |
| 				$o[$ptr - 1]['general_category'] < UCDN::UNICODE_GENERAL_CATEGORY_LOWERCASE_LETTER || 
 | |
| 				$o[$ptr - 1]['general_category'] > UCDN::UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK
 | |
| 				)
 | |
| 
 | |
| 			&& (preg_match('/^(RH|r)?[sD][N]?([ZJ]?H[CR]m*)?([M]*[N]?[H]?)?[S]?[v]{0,2}/', substr($s,$ptr), $ma))) {
 | |
| 			// From HarfBuzz:
 | |
| 			// && (preg_match('/^(RH|r)?[sD](Z?[N]{0,2})?(([ZJ]?H(J[N]?)?)[CR]J?(Z?[N]{0,2})?){0,4}((([ZJ]?H(J[N]?)?)|HZ)|(HJ)?([ZJ]{0,3}M[N]?(H|JHJR)?){0,4})?(S[Z]?)?[v]{0,2}/', substr($s,$ptr), $ma)) {
 | |
| 			$syllable_length = strlen($ma[0]);
 | |
| 			$syllable_type = self::STANDALONE_CLUSTER ;
 | |
| 		}
 | |
| 
 | |
| 		// BROKEN_CLUSTER syllable
 | |
| 		else if (preg_match('/^(RH|r)?[N]?([ZJ]?H[CR])?([M]*[N]?[H]?)?[S]?[v]{0,2}/', substr($s,$ptr), $ma)) {
 | |
| 		// From HarfBuzz:
 | |
| 		//else if (preg_match('/^(RH|r)?(Z?[N]{0,2})?(([ZJ]?H(J[N]?)?)[CR]J?(Z?[N]{0,2})?){0,4}((([ZJ]?H(J[N]?)?)|HZ)|(HJ)?([ZJ]{0,3}M[N]?(H|JHJR)?){0,4})(S[Z]?)?[v]{0,2}/', substr($s,$ptr), $ma)) {
 | |
| 			if (strlen($ma[0])) {	// May match blank
 | |
| 				$syllable_length = strlen($ma[0]);
 | |
| 				$syllable_type = self::BROKEN_CLUSTER ;
 | |
| 				$broken_syllables = true;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		for ($i = $ptr; $i < $ptr+$syllable_length; $i++) { $o[$i]['syllable'] = ($syllable_serial << 4) | $syllable_type; }
 | |
| 		$ptr += $syllable_length ;
 | |
| 		$syllable_serial++;
 | |
| 		if ($syllable_serial == 16) $syllable_serial = 1;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| 
 | |
| public static function set_syllables_sinhala(&$o, $s, &$broken_syllables) {
 | |
| 	$ptr = 0;
 | |
| 	$syllable_serial = 1;
 | |
| 	$broken_syllables = false;
 | |
| 
 | |
| 	while($ptr < strlen($s)) {
 | |
| 		$match = '';
 | |
| 		$syllable_length = 1;
 | |
| 		$syllable_type = self::NON_INDIC_CLUSTER ;
 | |
| 		// CONSONANT_SYLLABLE Consonant syllable
 | |
| 		// From OT spec:
 | |
| 		if (preg_match('/^([CR]HJ|[CR]JH){0,8}[CR][HM]{0,3}[S]{0,1}/', substr($s,$ptr), $ma)) {
 | |
| 			$syllable_length = strlen($ma[0]);
 | |
| 			$syllable_type = self::CONSONANT_SYLLABLE ;
 | |
| 		}
 | |
| 		// VOWEL_SYLLABLE Vowel-based syllable
 | |
| 		// From OT spec:
 | |
| 		else if (preg_match('/^V[S]{0,1}/', substr($s,$ptr), $ma)) {
 | |
| 			$syllable_length = strlen($ma[0]);
 | |
| 			$syllable_type = self::VOWEL_SYLLABLE ;
 | |
| 		}
 | |
| 
 | |
| 		for ($i = $ptr; $i < $ptr+$syllable_length; $i++) { $o[$i]['syllable'] = ($syllable_serial << 4) | $syllable_type; }
 | |
| 		$ptr += $syllable_length ;
 | |
| 		$syllable_serial++;
 | |
| 		if ($syllable_serial == 16) $syllable_serial = 1;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| public static function set_syllables_khmer(&$o, $s, &$broken_syllables) {
 | |
| 	$ptr = 0;
 | |
| 	$syllable_serial = 1;
 | |
| 	$broken_syllables = false;
 | |
| 
 | |
| 	while($ptr < strlen($s)) {
 | |
| 		$match = '';
 | |
| 		$syllable_length = 1;
 | |
| 		$syllable_type = self::NON_INDIC_CLUSTER ;
 | |
| 		// CONSONANT_SYLLABLE Consonant syllable
 | |
| 		if (preg_match('/^r?([CR]J?((Z?F)?[N]{0,2})?[ZJ]?G(JN?)?){0,4}[CR]J?((Z?F)?[N]{0,2})?A?((([ZJ]?G(JN?)?)|GZ)|(GJ)?([ZJ]{0,3}MN?(H|JHJR)?){0,4})?(G([CR]J?((Z?F)?[N]{0,2})?|V))?(SZ?)?[v]{0,2}/', substr($s,$ptr), $ma)) {
 | |
| 			$syllable_length = strlen($ma[0]);
 | |
| 			$syllable_type = self::CONSONANT_SYLLABLE ;
 | |
| 		}
 | |
| 		// VOWEL_SYLLABLE Vowel-based syllable
 | |
| 		else if (preg_match('/^(RH|r)?V((Z?F)?[N]{0,2})?(J|([ZJ]?G(JN?)?[CR]J?((Z?F)?[N]{0,2})?){0,4}((([ZJ]?G(JN?)?)|GZ)|(GJ)?([ZJ]{0,3}MN?(H|JHJR)?){0,4})?(G([CR]J?((Z?F)?[N]{0,2})?|V))?(SZ?)?[v]{0,2})/', substr($s,$ptr), $ma)) {
 | |
| 			$syllable_length = strlen($ma[0]);
 | |
| 			$syllable_type = self::VOWEL_SYLLABLE ;
 | |
| 		}
 | |
| 
 | |
| 
 | |
| 		// BROKEN_CLUSTER syllable
 | |
| 		else if (preg_match('/^(RH|r)?((Z?F)?[N]{0,2})?(([ZJ]?G(JN?)?)[CR]J?((Z?F)?[N]{0,2})?){0,4}((([ZJ]?G(JN?)?)|GZ)|(GJ)?([ZJ]{0,3}MN?(H|JHJR)?){0,4})(G([CR]J?((Z?F)?[N]{0,2})?|V))?(SZ?)?[v]{0,2}/', substr($s,$ptr), $ma)) {
 | |
| 			if (strlen($ma[0])) {	// May match blank
 | |
| 				$syllable_length = strlen($ma[0]);
 | |
| 				$syllable_type = self::BROKEN_CLUSTER ;
 | |
| 				$broken_syllables = true;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		for ($i = $ptr; $i < $ptr+$syllable_length; $i++) { $o[$i]['syllable'] = ($syllable_serial << 4) | $syllable_type; }
 | |
| 		$ptr += $syllable_length ;
 | |
| 		$syllable_serial++;
 | |
| 		if ($syllable_serial == 16) $syllable_serial = 1;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| public static function initial_reordering(&$info, $GSUBdata, $broken_syllables, $indic_config, $scriptblock, $is_old_spec, $dottedcircle) {
 | |
| 
 | |
| 	self::update_consonant_positions ($info, $GSUBdata);
 | |
| 
 | |
| 	if ($broken_syllables && $dottedcircle) { self::insert_dotted_circles ($info, $dottedcircle); }
 | |
| 
 | |
| 	$count = count($info);
 | |
| 	if (!$count) return;
 | |
| 	$last = 0;
 | |
| 	$last_syllable = $info[0]['syllable'];
 | |
| 	for ($i = 1; $i < $count; $i++) {
 | |
| 		if ($last_syllable != $info[$i]['syllable']) {
 | |
| 			self::initial_reordering_syllable ($info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $last, $i);
 | |
| 			$last = $i;
 | |
| 			$last_syllable = $info[$last]['syllable'];
 | |
| 		}
 | |
| 	}
 | |
| 	self::initial_reordering_syllable($info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $last, $count);
 | |
| }
 | |
| 
 | |
| public static function update_consonant_positions(&$info, $GSUBdata) {
 | |
| 	$count = count($info);
 | |
| 	for ($i = 0; $i < $count; $i++) {
 | |
| 		if ($info[$i]['indic_position'] == self::POS_BASE_C) {
 | |
| 			$c = $info[$i]['uni'];
 | |
| 			// If would substitute...
 | |
| 			if (isset($GSUBdata['pref'][$c])) { $info[$i]['indic_position'] = self::POS_POST_C; }
 | |
| 			else if (isset($GSUBdata['blwf'][$c])) { $info[$i]['indic_position'] = self::POS_BELOW_C; }
 | |
| 			else if (isset($GSUBdata['pstf'][$c])) { $info[$i]['indic_position'] = self::POS_POST_C; }
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| public static function insert_dotted_circles(&$info, $dottedcircle) {
 | |
| 	$idx = 0;
 | |
| 	$last_syllable = 0;
 | |
| 	while ($idx < count($info)) {
 | |
| 		$syllable = $info[$idx]['syllable'];
 | |
| 		$syllable_type = ($syllable & 0x0F);
 | |
| 		if ($last_syllable != $syllable && $syllable_type == self::BROKEN_CLUSTER) {
 | |
| 			$last_syllable = $syllable;
 | |
| 
 | |
| 			$dottedcircle[0]['syllable'] = $info[$idx]['syllable'];
 | |
| 
 | |
| 			/* Insert dottedcircle after possible Repha. */
 | |
| 			while ($idx < count($info) && $last_syllable == $info[$idx]['syllable'] && $info[$idx]['indic_category'] == self::OT_Repha)
 | |
| 				$idx++;
 | |
| 			array_splice($info, $idx, 0, $dottedcircle);
 | |
| 		}
 | |
| 		else
 | |
| 			$idx++;
 | |
| 	}
 | |
| 	// I am not sue how this code below got in here, since $idx should now be > count($info) and thus invalid.
 | |
| 	// In case I am missing something(!) I'll leave a warning here for now:
 | |
| 	if (isset($info[$idx])) { die("This shouldn't happen (in otl.php)"); exit; }
 | |
| 	// In case of final bloken cluster...
 | |
| 	//$syllable = $info[$idx]['syllable'];
 | |
| 	//$syllable_type = ($syllable & 0x0F);
 | |
| 	//if ($last_syllable != $syllable && $syllable_type == self::BROKEN_CLUSTER) {
 | |
| 	//	$dottedcircle[0]['syllable'] = $info[$idx]['syllable'];
 | |
| 	//	array_splice($info, $idx, 0, $dottedcircle);
 | |
| 	//}
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| /* Rules from:
 | |
|  * https://www.microsoft.com/typography/otfntdev/devanot/shaping.aspx */
 | |
| 
 | |
| public static function initial_reordering_syllable (&$info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $start, $end) {
 | |
| 	/* vowel_syllable: We made the vowels look like consonants. So uses the consonant logic! */
 | |
| 	/* broken_cluster: We already inserted dotted-circles, so just call the standalone_cluster. */
 | |
| 	/* standalone_cluster: We treat NBSP/dotted-circle as if they are consonants, so we should just chain. */
 | |
| 
 | |
| 	$syllable_type = ($info[$start]['syllable'] & 0x0F);
 | |
| 	if ($syllable_type==self::NON_INDIC_CLUSTER ) { return; }
 | |
| 	if ($syllable_type==self::BROKEN_CLUSTER  || $syllable_type==self::STANDALONE_CLUSTER ) { 
 | |
| 		//if ($uniscribe_bug_compatible) {
 | |
| 		/* For dotted-circle, this is what Uniscribe does:
 | |
| 		* If dotted-circle is the last glyph, it just does nothing.
 | |
| 		* i.e. It doesn't form Reph. */
 | |
| 		if ($info[$end - 1]['indic_category'] == self::OT_DOTTEDCIRCLE) {
 | |
| 			return;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/* 1. Find base consonant:
 | |
| 	*
 | |
| 	* The shaping engine finds the base consonant of the syllable, using the
 | |
| 	* following algorithm: starting from the end of the syllable, move backwards
 | |
| 	* until a consonant is found that does not have a below-base or post-base
 | |
| 	* form (post-base forms have to follow below-base forms), or that is not a
 | |
| 	* pre-base reordering Ra, or arrive at the first consonant. The consonant
 | |
| 	* stopped at will be the base.
 | |
| 	*
 | |
| 	*	o If the syllable starts with Ra + Halant (in a script that has Reph)
 | |
| 	*	and has more than one consonant, Ra is excluded from candidates for
 | |
| 	*	base consonants.
 | |
| 	*/
 | |
| 
 | |
| 	$base = $end;
 | |
| 	$has_reph = false;
 | |
| 	$limit = $start;
 | |
| 
 | |
| 	if ($scriptblock != UCDN::SCRIPT_KHMER) { 
 | |
| 		/* -> If the syllable starts with Ra + Halant (in a script that has Reph)
 | |
| 		*	and has more than one consonant, Ra is excluded from candidates for
 | |
| 		*	base consonants. */
 | |
| 		if (count($GSUBdata['rphf']) /* ?? $indic_plan->mask_array[RPHF] */ && $start + 3 <= $end &&
 | |
| 			(
 | |
| 			($indic_config[4] == self::REPH_MODE_IMPLICIT && !self::is_joiner($info[$start + 2])) ||
 | |
| 			($indic_config[4] == self::REPH_MODE_EXPLICIT && $info[$start + 2]['indic_category'] == self::OT_ZWJ)
 | |
| 			)) {
 | |
| 			/* See if it matches the 'rphf' feature. */
 | |
| 			//$glyphs = array($info[$start]['uni'], $info[$start + 1]['uni']);
 | |
| 			//if ($indic_plan->rphf->would_substitute ($glyphs, count($glyphs), true, face)) {
 | |
| 			if (isset($GSUBdata['rphf'][$info[$start]['uni']]) && self::is_halant_or_coeng($info[$start + 1]) ) {
 | |
| 				$limit += 2;
 | |
| 				while ($limit < $end && self::is_joiner($info[$limit]))
 | |
| 					$limit++;
 | |
| 				$base = $start;
 | |
| 				$has_reph = true;
 | |
| 			}
 | |
| 		}
 | |
| 		else if ($indic_config[4] == self::REPH_MODE_LOG_REPHA && $info[$start]['indic_category'] == self::OT_Repha) {
 | |
| 			$limit += 1;
 | |
| 			while ($limit < $end && self::is_joiner($info[$limit]))
 | |
| 				$limit++;
 | |
| 			$base = $start;
 | |
| 			$has_reph = true;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	switch ($indic_config[2]) {	// base_pos
 | |
| 		case self::BASE_POS_LAST:
 | |
| 			/* -> starting from the end of the syllable, move backwards */
 | |
| 			$i = $end;
 | |
| 			$seen_below = false;
 | |
| 			do {
 | |
| 				$i--;
 | |
| 				/* -> until a consonant is found */
 | |
| 				if (self::is_consonant($info[$i])) {
 | |
| 					/* -> that does not have a below-base or post-base form
 | |
| 					* (post-base forms have to follow below-base forms), */
 | |
| 					if ($info[$i]['indic_position'] != self::POS_BELOW_C && ($info[$i]['indic_position'] != self::POS_POST_C || $seen_below)) {
 | |
| 						$base = $i;
 | |
| 						break;
 | |
| 					}
 | |
| 					if ($info[$i]['indic_position'] == self::POS_BELOW_C)
 | |
| 						$seen_below = true;
 | |
| 
 | |
| 					/* -> or that is not a pre-base reordering Ra,
 | |
| 					*
 | |
| 					* IMPLEMENTATION NOTES:
 | |
| 					*
 | |
| 					* Our pre-base reordering Ra's are marked POS_POST_C, so will be skipped
 | |
| 					* by the logic above already.
 | |
| 					*/
 | |
| 
 | |
| 					/* -> or arrive at the first consonant. The consonant stopped at will
 | |
| 					* be the base. */
 | |
| 					$base = $i;
 | |
| 				}
 | |
| 				else {
 | |
| 					/* A ZWJ after a Halant stops the base search, and requests an explicit
 | |
| 					* half form. 
 | |
| 					* [A ZWJ before a Halant, requests a subjoined form instead, and hence
 | |
| 					* search continues. This is particularly important for Bengali
 | |
| 					* sequence Ra,H,Ya that should form Ya-Phalaa by subjoining Ya] */
 | |
| 					if ($start < $i && $info[$i]['indic_category'] == self::OT_ZWJ && $info[$i - 1]['indic_category'] == self::OT_H) {
 | |
| 						if (!defined("OMIT_INDIC_FIX_1") || OMIT_INDIC_FIX_1!=1) { $base = $i; }	// INDIC_FIX_1
 | |
| 						break;
 | |
| 					}
 | |
| 					// ZKI8
 | |
| 					if ($start < $i && $info[$i]['indic_category'] == self::OT_ZWNJ) {
 | |
| 						break;
 | |
| 					}
 | |
| 				}
 | |
| 			} while ($i > $limit);
 | |
| 			break;
 | |
| 
 | |
| 		case self::BASE_POS_FIRST:
 | |
| 			/* In scripts without half forms (eg. Khmer), the first consonant is always the base. */
 | |
| 
 | |
| 				if (!$has_reph)
 | |
| 					$base = $limit;
 | |
| 
 | |
| 				/* Find the last base consonant that is not blocked by ZWJ.	If there is
 | |
| 				* a ZWJ right before a base consonant, that would request a subjoined form. */
 | |
| 				for ($i = $limit; $i < $end; $i++) {
 | |
| 					if (self::is_consonant($info[$i]) && $info[$i]['indic_position'] == self::POS_BASE_C) {
 | |
| 						if ($limit < $i && $info[$i - 1]['indic_category'] == self::OT_ZWJ)
 | |
| 							break;
 | |
| 						else
 | |
| 							$base = $i;
 | |
| 					}
 | |
| 				}
 | |
| 
 | |
| 			/* Mark all subsequent consonants as below. */
 | |
| 			for ($i = $base + 1; $i < $end; $i++) {
 | |
| 				if (self::is_consonant ($info[$i]) && $info[$i]['indic_position'] == self::POS_BASE_C)
 | |
| 					$info[$i]['indic_position'] = self::POS_BELOW_C;
 | |
| 			}
 | |
| 			break;
 | |
| 		//default:
 | |
| 			//assert (false);
 | |
| 			/* fallthrough */
 | |
| 	}
 | |
| 
 | |
| 	/* -> If the syllable starts with Ra + Halant (in a script that has Reph)
 | |
| 	*	and has more than one consonant, Ra is excluded from candidates for
 | |
| 	*	base consonants.
 | |
| 	*
 | |
| 	*	Only do this for unforced Reph. (ie. not for Ra,H,ZWJ. */
 | |
| 	if ($scriptblock != UCDN::SCRIPT_KHMER) { 
 | |
| 		if ($has_reph && $base == $start && $limit - $base <= 2) {
 | |
| 			/* Have no other consonant, so Reph is not formed and Ra becomes base. */
 | |
| 			$has_reph = false;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/* 2. Decompose and reorder Matras:
 | |
| 	*
 | |
| 	* Each matra and any syllable modifier sign in the cluster are moved to the
 | |
| 	* appropriate position relative to the consonant(s) in the cluster. The
 | |
| 	* shaping engine decomposes two- or three-part matras into their constituent
 | |
| 	* parts before any repositioning. Matra characters are classified by which
 | |
| 	* consonant in a conjunct they have affinity for and are reordered to the
 | |
| 	* following positions:
 | |
| 	*
 | |
| 	*		o Before first half form in the syllable
 | |
| 	*		o After subjoined consonants
 | |
| 	*		o After post-form consonant
 | |
| 	*		o After main consonant (for above marks)
 | |
| 	*
 | |
| 	* IMPLEMENTATION NOTES:
 | |
| 	*
 | |
| 	* The normalize() routine has already decomposed matras for us, so we don't
 | |
| 	* need to worry about that.
 | |
| 	*/
 | |
| 
 | |
| 
 | |
| 	/* 3.	Reorder marks to canonical order:
 | |
| 	*
 | |
| 	* Adjacent nukta and halant or nukta and vedic sign are always repositioned
 | |
| 	* if necessary, so that the nukta is first.
 | |
| 	*
 | |
| 	* IMPLEMENTATION NOTES:
 | |
| 	*
 | |
| 	* Use the combining Class from Unicode categories? to bubble_sort.
 | |
| 	*/
 | |
| 
 | |
| 	/* Reorder characters */
 | |
| 
 | |
| 	for ($i = $start; $i < $base; $i++)
 | |
| 		$info[$i]['indic_position'] = min(self::POS_PRE_C, $info[$i]['indic_position']);
 | |
| 
 | |
| 	if ($base < $end)
 | |
| 		$info[$base]['indic_position'] = self::POS_BASE_C;
 | |
| 
 | |
| 	/* Mark final consonants. A final consonant is one appearing after a matra,
 | |
| 	* ? only in Khmer. */
 | |
| 	for ($i = $base + 1; $i < $end; $i++)
 | |
| 		if ($info[$i]['indic_category'] == self::OT_M) {
 | |
| 			for ($j = $i + 1; $j < $end; $j++)
 | |
| 				if (self::is_consonant ($info[$j])) {
 | |
| 					$info[$j]['indic_position'] = self::POS_FINAL_C;
 | |
| 					break;
 | |
| 				}
 | |
| 			break;
 | |
| 		}
 | |
| 
 | |
| 	/* Handle beginning Ra */
 | |
| 	if ($scriptblock != UCDN::SCRIPT_KHMER) { 
 | |
| 		if ($has_reph)
 | |
| 			$info[$start]['indic_position'] = self::POS_RA_TO_BECOME_REPH;
 | |
|    	}
 | |
| 
 | |
| 
 | |
| 	/* For old-style Indic script tags, move the first post-base Halant after
 | |
| 	* last consonant.	Only do this if there is *not* a Halant after last
 | |
| 	* consonant. Otherwise it becomes messy. */
 | |
| 	if ($is_old_spec) {
 | |
| 		for ($i = $base + 1; $i < $end; $i++) {
 | |
| 			if ($info[$i]['indic_category'] == self::OT_H) {
 | |
| 				for ($j = $end - 1; $j > $i; $j--) {
 | |
| 					if (self::is_consonant($info[$j]) || $info[$j]['indic_category'] == self::OT_H) { break; }
 | |
| 				}
 | |
| 				if ($info[$j]['indic_category'] != self::OT_H && $j > $i) {
 | |
| 					/* Move Halant to after last consonant. */
 | |
| 					self::_move_info_pos($info, $i, $j+1);
 | |
| 				}
 | |
| 				break;
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/* Attach misc marks to previous char to move with them. */
 | |
| 	$last_pos = self::POS_START;
 | |
| 	for ($i = $start; $i < $end; $i++) {
 | |
| 		if ((self::FLAG($info[$i]['indic_category']) & (self::FLAG(self::OT_ZWJ)| self::FLAG(self::OT_ZWNJ) | self::FLAG(self::OT_N) | self::FLAG (self::OT_RS) | self::FLAG (self::OT_H) | self::FLAG (self::OT_Coeng) ))) {
 | |
| 			$info[$i]['indic_position'] = $last_pos;
 | |
| 			if ($info[$i]['indic_category'] == self::OT_H && $info[$i]['indic_position'] == self::POS_PRE_M) {
 | |
| 				/*
 | |
| 				* Uniscribe doesn't move the Halant with Left Matra.
 | |
| 				* TEST: U+092B,U+093F,U+094DE
 | |
| 				* We follow.	This is important for the Sinhala
 | |
| 				* U+0DDA split matra since it decomposes to U+0DD9,U+0DCA
 | |
| 				* where U+0DD9 is a left matra and U+0DCA is the virama.
 | |
| 				* We don't want to move the virama with the left matra.
 | |
| 				* TEST: U+0D9A,U+0DDA
 | |
| 				*/
 | |
| 				for ($j = $i; $j > $start; $j--)
 | |
| 					if ($info[$j - 1]['indic_position'] != self::POS_PRE_M) {
 | |
| 						$info[$i]['indic_position'] = $info[$j - 1]['indic_position'];
 | |
| 						break;
 | |
| 					}
 | |
| 			}
 | |
| 		}
 | |
| 		else if ($info[$i]['indic_position'] != self::POS_SMVD) {
 | |
| 			$last_pos = $info[$i]['indic_position'];
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/* Re-attach ZWJ, ZWNJ, and halant to next char, for after-base consonants. */
 | |
| 	$last_halant = $end;
 | |
| 	for ($i = $base + 1; $i < $end; $i++) {
 | |
| 		if (self::is_halant_or_coeng($info[$i]))
 | |
| 			$last_halant = $i;
 | |
| 		else if (self::is_consonant($info[$i])) {
 | |
| 			for ($j = $last_halant; $j < $i; $j++)
 | |
| 				if ($info[$j]['indic_position'] != self::POS_SMVD)
 | |
| 					$info[$j]['indic_position'] = $info[$i]['indic_position'];
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 	if ($scriptblock == UCDN::SCRIPT_KHMER) { 
 | |
| 		/* KHMER_FIX_2 */
 | |
| 		/* Move Coeng+RO (Halant,Ra) sequence before base consonant. */
 | |
| 		for ($i = $base + 1; $i < $end; $i++) {
 | |
| 			if (self::is_halant_or_coeng($info[$i]) && self::is_ra($info[$i + 1]['uni'])) {
 | |
| 				$info[$i]['indic_position'] = self::POS_PRE_C;
 | |
| 				$info[$i + 1]['indic_position'] = self::POS_PRE_C;
 | |
| 				break;
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 
 | |
| /*
 | |
| if (!defined("OMIT_INDIC_FIX_2") || OMIT_INDIC_FIX_2 != 1) {
 | |
| 	// INDIC_FIX_2
 | |
| 	$ZWNJ_found = false;
 | |
| 	$POST_ZWNJ_c_found = false;
 | |
| 	for ($i = $base + 1; $i < $end; $i++) {
 | |
| 		if ($info[$i]['indic_category'] == self::OT_ZWNJ) { $ZWNJ_found = true; }
 | |
| 		else if ($ZWNJ_found && $info[$i]['indic_category'] == self::OT_C) { $POST_ZWNJ_c_found = true; }
 | |
| 		else if ($POST_ZWNJ_c_found && $info[$i]['indic_position'] == self::POS_BEFORE_SUB) { $info[$i]['indic_position'] = self::POS_AFTER_SUB; }
 | |
| 	}
 | |
| }
 | |
| */
 | |
| 
 | |
| 	/* Setup masks now */
 | |
| 	for ($i = $start; $i < $end; $i++) {
 | |
| 		$info[$i]['mask'] = 0;
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 	if ($scriptblock == UCDN::SCRIPT_KHMER) { 
 | |
| 		/* Find a Coeng+RO (Halant,Ra) sequence and mark it for pre-base processing. */
 | |
| 		$mask = self::FLAG(self::PREF);
 | |
| 		for ($i = $base; $i < $end-1; $i++) {	/* KHMER_FIX_1 From $start (not base) */
 | |
| 			if (self::is_halant_or_coeng($info[$i]) && self::is_ra($info[$i + 1]['uni'])  ) {
 | |
| 
 | |
| 				$info[$i]['mask'] |= self::FLAG(self::PREF);
 | |
| 				$info[$i + 1]['mask'] |= self::FLAG(self::PREF);
 | |
| 
 | |
| 				/* Mark the subsequent stuff with 'cfar'.  Used in Khmer.
 | |
| 				* Read the feature spec.
 | |
| 				* This allows distinguishing the following cases with MS Khmer fonts:
 | |
| 				* U+1784,U+17D2,U+179A,U+17D2,U+1782  [C+Coeng+RO+Coeng+C] => Should activate CFAR
 | |
| 				* U+1784,U+17D2,U+1782,U+17D2,U+179A  [C+Coeng+C+Coeng+RO] => Should NOT activate CFAR
 | |
| 				*/
 | |
| 				for ($j=($i+2); $j < $end; $j++)
 | |
| 					$info[$j]['mask'] |= self::FLAG(self::CFAR);
 | |
| 
 | |
| 				break;
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 
 | |
| 	/* Sit tight, rock 'n roll! */
 | |
| 	self::bubble_sort ($info, $start, $end - $start);
 | |
| 
 | |
| 	/* Find base again */
 | |
| 	$base = $end;
 | |
| 	for ($i = $start; $i < $end; $i++) {
 | |
| 		if ($info[$i]['indic_position'] == self::POS_BASE_C) {
 | |
| 			$base = $i;
 | |
| 			break;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if ($scriptblock != UCDN::SCRIPT_KHMER) { 
 | |
| 		/* Reph */
 | |
| 		for ($i = $start; $i < $end; $i++) {
 | |
| 			if ($info[$i]['indic_position'] == self::POS_RA_TO_BECOME_REPH) {
 | |
| 				$info[$i]['mask'] |= self::FLAG(self::RPHF);
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		/* Pre-base */
 | |
| 		$mask = self::FLAG(self::HALF);
 | |
| 		for ($i = $start; $i < $base; $i++) {
 | |
| 			$info[$i]['mask'] |= $mask;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/* Post-base */
 | |
| 	$mask = (self::FLAG(self::BLWF) | self::FLAG(self::ABVF) | self::FLAG(self::PSTF));
 | |
| 	for ($i = $base + 1; $i < $end; $i++) {
 | |
| 		$info[$i]['mask'] |= $mask;
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 	if ($scriptblock != UCDN::SCRIPT_KHMER) { 
 | |
| 	if (!defined("OMIT_INDIC_FIX_3") || OMIT_INDIC_FIX_3 != 1) {
 | |
| 		/* INDIC_FIX_3 */
 | |
| 		/* Find a (pre-base) Consonant, Halant,Ra sequence and mark Halant|Ra for below-base BLWF processing. */
 | |
| 		// TEST CASE ক্র্ক in FreeSans versus Vrinda
 | |
| 		if (($base - $start) >= 3) {
 | |
| 			for ($i = $start; $i < ($base-2); $i++) {
 | |
| 				if (self::is_consonant($info[$i])) {
 | |
| 					if (self::is_halant_or_coeng($info[$i + 1]) && self::is_ra($info[$i + 2]['uni'])) {
 | |
| 						// If would substitute Halant+Ra...BLWF
 | |
| 						if (isset($GSUBdata['blwf'][$info[$i+2]['uni']])) { 
 | |
| 							$info[$i + 1]['mask'] |= self::FLAG(self::BLWF);
 | |
| 							$info[$i + 2]['mask'] |= self::FLAG(self::BLWF);
 | |
| 	 					}
 | |
| 						/* If would not substitute as blwf, mark Ra+Halant for RPHF using following Halant (if present) */
 | |
| 						else if (self::is_halant_or_coeng($info[$i + 3])) {
 | |
| 							$info[$i + 2]['mask'] |= self::FLAG(self::RPHF);
 | |
| 							$info[$i + 3]['mask'] |= self::FLAG(self::RPHF);
 | |
| 						}
 | |
| 						break;
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 
 | |
| 	if ($is_old_spec && $scriptblock == UCDN::SCRIPT_DEVANAGARI) {
 | |
| 		/* Old-spec eye-lash Ra needs special handling.	From the spec:
 | |
| 		* "The feature 'below-base form' is applied to consonants
 | |
| 		* having below-base forms and following the base consonant.
 | |
| 		* The exception is vattu, which may appear below half forms
 | |
| 		* as well as below the base glyph. The feature 'below-base
 | |
| 		* form' will be applied to all such occurrences of Ra as well."
 | |
| 		*
 | |
| 		* Test case: U+0924,U+094D,U+0930,U+094d,U+0915
 | |
| 		* with Sanskrit 2003 font.
 | |
| 		*
 | |
| 		* However, note that Ra,Halant,ZWJ is the correct way to
 | |
| 		* request eyelash form of Ra, so we wouldbn't inhibit it
 | |
| 		* in that sequence.
 | |
| 		*
 | |
| 		* Test case: U+0924,U+094D,U+0930,U+094d,U+200D,U+0915
 | |
| 		*/
 | |
| 		for ($i = $start; ($i + 1) < $base; $i++) {
 | |
| 			if ($info[$i]['indic_category'] == self::OT_Ra && $info[$i+1]['indic_category'] == self::OT_H &&
 | |
| 				($i + 2 == $base || $info[$i+2]['indic_category'] != self::OT_ZWJ)) {
 | |
| 				$info[$i]['mask'] |= self::FLAG(self::BLWF);
 | |
| 				$info[$i+1]['mask'] |= self::FLAG(self::BLWF);
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	if ($scriptblock != UCDN::SCRIPT_KHMER) { 
 | |
| 		if (count($GSUBdata['pref']) && $base + 2 < $end) {
 | |
| 			/* Find a Halant,Ra sequence and mark it for pre-base processing. */
 | |
| 			for ($i = $base + 1; $i + 1 < $end; $i++) {
 | |
| 				// If old_spec find Ra-Halant...
 | |
| 				if ((isset($GSUBdata['pref'][$info[$i + 1]['uni']]) && self::is_halant_or_coeng($info[$i]) && self::is_ra($info[$i + 1]['uni'])  ) || 
 | |
| 				($is_old_spec && isset($GSUBdata['pref'][$info[$i]['uni']]) && self::is_halant_or_coeng($info[$i + 1]) && self::is_ra($info[$i]['uni'])  )
 | |
| 					) {
 | |
| 					$info[$i++]['mask'] |= self::FLAG(self::PREF);
 | |
| 					$info[$i++]['mask'] |= self::FLAG(self::PREF);
 | |
| 					break;
 | |
| 				}
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 	/* Apply ZWJ/ZWNJ effects */
 | |
| 	for ($i = $start + 1; $i < $end; $i++) {
 | |
| 		if (self::is_joiner ($info[$i])) {
 | |
| 			$non_joiner = ($info[$i]['indic_category'] == self::OT_ZWNJ);
 | |
| 			$j = $i;
 | |
| 			while ($j > $start) {
 | |
| 				if (defined("OMIT_INDIC_FIX_4") && OMIT_INDIC_FIX_4 == 1) {
 | |
| 					// INDIC_FIX_4 = do nothing - carry on //
 | |
| 					// ZWNJ should block H C from forming blwf post-base - need to unmask backwards beyond first consonant arrived at //
 | |
| 					if  (!self::is_consonant($info[$j])) { break; }
 | |
| 				}
 | |
| 				$j--;
 | |
| 
 | |
| 				/* ZWJ/ZWNJ should disable CJCT.	They do that by simply
 | |
| 	 			* being there, since we don't skip them for the CJCT
 | |
| 	 			* feature (ie. F_MANUAL_ZWJ) */
 | |
| 
 | |
| 				/* A ZWNJ disables HALF. */
 | |
| 				if ($non_joiner) {
 | |
| 					$info[$j]['mask'] &= ~(self::FLAG(self::HALF) | self::FLAG(self::BLWF));
 | |
| 				}
 | |
| 
 | |
| 			} 
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| public static function final_reordering (&$info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec) {
 | |
| 	$count = count($info);
 | |
| 	if (!$count) return;
 | |
| 	$last = 0;
 | |
| 	$last_syllable = $info[0]['syllable'];
 | |
| 	for ($i = 1; $i < $count; $i++) {
 | |
| 		if ($last_syllable != $info[$i]['syllable']) {
 | |
| 			self::final_reordering_syllable ($info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $last, $i);
 | |
| 			$last = $i;
 | |
| 			$last_syllable = $info[$last]['syllable'];
 | |
| 		}
 | |
| 	}
 | |
| 	self::final_reordering_syllable ($info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $last, $count);
 | |
| 
 | |
| }
 | |
| 
 | |
| public static function final_reordering_syllable (&$info, $GSUBdata, $indic_config, $scriptblock, $is_old_spec, $start, $end) {
 | |
| 
 | |
| 	/* 4. Final reordering:
 | |
| 	*
 | |
| 	* After the localized forms and basic shaping forms GSUB features have been
 | |
| 	* applied (see below), the shaping engine performs some final glyph
 | |
| 	* reordering before applying all the remaining font features to the entire
 | |
| 	* cluster.
 | |
| 	*/
 | |
| 
 | |
| 	/* Find base again */
 | |
| 	for ($base = $start; $base < $end; $base++)
 | |
| 		if ($info[$base]['indic_position'] >= self::POS_BASE_C) {
 | |
| 			if ($start < $base && $info[$base]['indic_position'] > self::POS_BASE_C)
 | |
| 				$base--;
 | |
| 			break;
 | |
| 		}
 | |
| 	if ($base == $end && $start < $base && $info[$base - 1]['indic_category'] != self::OT_ZWJ)
 | |
| 		$base--;
 | |
| 	while ($start < $base && isset($info[$base]) && ($info[$base]['indic_category'] == self::OT_H || $info[$base]['indic_category'] == self::OT_N))
 | |
| 		$base--;
 | |
| 
 | |
| 
 | |
| 	/*	o Reorder matras:
 | |
| 	*
 | |
| 	*	If a pre-base matra character had been reordered before applying basic
 | |
| 	*	features, the glyph can be moved closer to the main consonant based on
 | |
| 	*	whether half-forms had been formed. Actual position for the matra is
 | |
| 	*	defined as "after last standalone halant glyph, after initial matra
 | |
| 	*	position and before the main consonant". If ZWJ or ZWNJ follow this
 | |
| 	*	halant, position is moved after it.
 | |
| 	*/
 | |
| 
 | |
| 
 | |
| 	if ($start + 1 < $end && $start < $base) { 	/* Otherwise there can't be any pre-base matra characters. */
 | |
| 		/* If we lost track of base, alas, position before last thingy. */
 | |
| 		$new_pos = ($base == $end) ? $base - 2 : $base - 1;
 | |
| 
 | |
| 		/* Malayalam / Tamil do not have "half" forms or explicit virama forms.
 | |
| 		* The glyphs formed by 'half' are Chillus or ligated explicit viramas.
 | |
| 		* We want to position matra after them.
 | |
| 		*/
 | |
| 		if ($scriptblock != UCDN::SCRIPT_MALAYALAM && $scriptblock != UCDN::SCRIPT_TAMIL) {
 | |
| 			while ($new_pos > $start && !(self::is_one_of ($info[$new_pos], (self::FLAG(self::OT_M) | self::FLAG(self::OT_H) | self::FLAG(self::OT_Coeng)))))
 | |
| 				$new_pos--;
 | |
| 
 | |
| 			/* If we found no Halant we are done.
 | |
| 			* Otherwise only proceed if the Halant does
 | |
| 			* not belong to the Matra itself! */
 | |
| 			if (self::is_halant_or_coeng($info[$new_pos]) && $info[$new_pos]['indic_position'] != self::POS_PRE_M) {
 | |
| 				/* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
 | |
| 				if ($new_pos + 1 < $end && self::is_joiner($info[$new_pos + 1]))
 | |
| 					$new_pos++;
 | |
| 			}
 | |
| 			else
 | |
| 				$new_pos = $start; /* No move. */
 | |
| 		}
 | |
| 
 | |
| 		if ($start < $new_pos && $info[$new_pos]['indic_position'] != self::POS_PRE_M) {
 | |
| 			/* Now go see if there's actually any matras... */
 | |
| 			for ($i = $new_pos; $i > $start; $i--)
 | |
| 				if ($info[$i - 1]['indic_position'] == self::POS_PRE_M) {
 | |
| 					$old_pos = $i - 1;
 | |
| 					//memmove (&info[$old_pos], &info[$old_pos + 1], ($new_pos - $old_pos) * sizeof ($info[0]));
 | |
| 					self::_move_info_pos($info, $old_pos, $new_pos+1);
 | |
| 
 | |
| 					if ($old_pos < $base && $base <= $new_pos) /* Shouldn't actually happen. */
 | |
| 						$base--;
 | |
| 					$new_pos--;
 | |
| 				}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 	/*	o Reorder reph:
 | |
| 	*
 | |
| 	*	Reph's original position is always at the beginning of the syllable,
 | |
| 	*	(i.e. it is not reordered at the character reordering stage). However,
 | |
| 	*	it will be reordered according to the basic-forms shaping results.
 | |
| 	*	Possible positions for reph, depending on the script, are; after main,
 | |
| 	*	before post-base consonant forms, and after post-base consonant forms.
 | |
| 	*/
 | |
| 
 | |
| 	/* If there's anything after the Ra that has the REPH pos, it ought to be halant.
 | |
| 	* Which means that the font has failed to ligate the Reph.	In which case, we
 | |
| 	* shouldn't move. */
 | |
| 	if ($start + 1 < $end && 
 | |
| 		$info[$start]['indic_position'] == self::POS_RA_TO_BECOME_REPH && $info[$start + 1]['indic_position'] != self::POS_RA_TO_BECOME_REPH) {
 | |
| 		$reph_pos = $indic_config[3];
 | |
| 		$skip_to_reph_step_5 = false;
 | |
| 		$skip_to_reph_move = false;
 | |
| 
 | |
| 		/*	1. If reph should be positioned after post-base consonant forms,
 | |
| 		*	proceed to step 5.
 | |
| 		*/
 | |
| 		if ($reph_pos == self::REPH_POS_AFTER_POST) {
 | |
| 			$skip_to_reph_step_5 = true;
 | |
| 		}
 | |
| 
 | |
| 		/*	2. If the reph repositioning class is not after post-base: target
 | |
| 		*	position is after the first explicit halant glyph between the
 | |
| 		*	first post-reph consonant and last main consonant. If ZWJ or ZWNJ
 | |
| 		*	are following this halant, position is moved after it. If such
 | |
| 		*	position is found, this is the target position. Otherwise,
 | |
| 		*	proceed to the next step.
 | |
| 		*
 | |
| 		*	Note: in old-implementation fonts, where classifications were
 | |
| 		*	fixed in shaping engine, there was no case where reph position
 | |
| 		*	will be found on this step.
 | |
| 		*/
 | |
| 
 | |
| 		if (!$skip_to_reph_step_5) {
 | |
| 
 | |
| 			$new_reph_pos = $start + 1;
 | |
| 
 | |
| 			while ($new_reph_pos < $base && !self::is_halant_or_coeng($info[$new_reph_pos]))
 | |
| 				$new_reph_pos++;
 | |
| 
 | |
| 			if ($new_reph_pos < $base && self::is_halant_or_coeng($info[$new_reph_pos])) {
 | |
| 				/* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
 | |
| 				if ($new_reph_pos + 1 < $base && self::is_joiner ($info[$new_reph_pos + 1]))
 | |
| 					$new_reph_pos++;
 | |
| 				$skip_to_reph_move =true;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 		/*	3. If reph should be repositioned after the main consonant: find the
 | |
| 		*	first consonant not ligated with main, or find the first
 | |
| 		*	consonant that is not a potential pre-base reordering Ra.
 | |
| 		*/
 | |
| 		if ($reph_pos == self::REPH_POS_AFTER_MAIN && !$skip_to_reph_move && !$skip_to_reph_step_5) {			
 | |
| 			$new_reph_pos = $base;
 | |
| 			/* XXX Skip potential pre-base reordering Ra. */
 | |
| 			while ($new_reph_pos + 1 < $end && $info[$new_reph_pos + 1]['indic_position'] <= self::POS_AFTER_MAIN)
 | |
| 				$new_reph_pos++;
 | |
| 			if ($new_reph_pos < $end)
 | |
| 				$skip_to_reph_move =true;
 | |
| 		}
 | |
| 
 | |
| 		/*	4. If reph should be positioned before post-base consonant, find
 | |
| 		*	first post-base classified consonant not ligated with main. If no
 | |
| 		*	consonant is found, the target position should be before the
 | |
| 		*	first matra, syllable modifier sign or vedic sign.
 | |
| 		*/
 | |
| 		/* This is our take on what step 4 is trying to say (and failing, BADLY). */
 | |
| 		if ($reph_pos == self::REPH_POS_AFTER_SUB && !$skip_to_reph_move && !$skip_to_reph_step_5) {
 | |
| 			$new_reph_pos = $base;
 | |
| 			while ($new_reph_pos < $end && isset($info[$new_reph_pos + 1]['indic_position']) && 
 | |
| 			!( self::FLAG($info[$new_reph_pos + 1]['indic_position']) & (self::FLAG(self::POS_POST_C) | self::FLAG(self::POS_AFTER_POST) | self::FLAG(self::POS_SMVD)))) {
 | |
| 				$new_reph_pos++;
 | |
| 			}
 | |
| 			if ($new_reph_pos < $end) { $skip_to_reph_move =true; }
 | |
| 		}
 | |
| 
 | |
| 		/*	5. If no consonant is found in steps 3 or 4, move reph to a position
 | |
| 		*		immediately before the first post-base matra, syllable modifier
 | |
| 		*		sign or vedic sign that has a reordering class after the intended
 | |
| 		*		reph position. For example, if the reordering position for reph
 | |
| 		*		is post-main, it will skip above-base matras that also have a
 | |
| 		*		post-main position.
 | |
| 		*/
 | |
| 		if (!$skip_to_reph_move) {
 | |
| 			/* Copied from step 2. */
 | |
| 			$new_reph_pos = $start + 1;
 | |
| 			while ($new_reph_pos < $base && !self::is_halant_or_coeng($info[$new_reph_pos]))
 | |
| 				$new_reph_pos++;
 | |
| 
 | |
| 			if ($new_reph_pos < $base && self::is_halant_or_coeng($info[$new_reph_pos])) {
 | |
| 				/* ->If ZWJ or ZWNJ are following this halant, position is moved after it. */
 | |
| 				if ($new_reph_pos + 1 < $base && self::is_joiner($info[$new_reph_pos + 1]))
 | |
| 					$new_reph_pos++;
 | |
| 				$skip_to_reph_move =true;
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 
 | |
| 		/*	6. Otherwise, reorder reph to the end of the syllable.
 | |
| 		*/
 | |
| 		if (!$skip_to_reph_move) {
 | |
| 			$new_reph_pos = $end - 1;
 | |
| 			while ($new_reph_pos > $start && $info[$new_reph_pos]['indic_position'] == self::POS_SMVD)
 | |
| 				$new_reph_pos--;
 | |
| 
 | |
| 			/*
 | |
| 			* If the Reph is to be ending up after a Matra,Halant sequence,
 | |
| 			* position it before that Halant so it can interact with the Matra.
 | |
| 			* However, if it's a plain Consonant,Halant we shouldn't do that.
 | |
| 			* Uniscribe doesn't do this.
 | |
| 			* TEST: U+0930,U+094D,U+0915,U+094B,U+094D
 | |
| 			*/
 | |
| 			//if (!$hb_options.uniscribe_bug_compatible && self::is_halant_or_coeng($info[$new_reph_pos])) {
 | |
| 			if (self::is_halant_or_coeng($info[$new_reph_pos])) {
 | |
| 				for ($i = $base + 1; $i < $new_reph_pos; $i++)
 | |
| 					if ($info[$i]['indic_category'] == self::OT_M) {
 | |
| 						/* Ok, got it. */
 | |
| 						$new_reph_pos--;
 | |
| 					}
 | |
| 			}
 | |
| 		}
 | |
| 
 | |
| 
 | |
| 		/* Move */
 | |
| 		self::_move_info_pos($info, $start, $new_reph_pos+1);
 | |
| 
 | |
| 		if ($start < $base && $base <= $new_reph_pos) {
 | |
| 			$base--;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 	/*	o Reorder pre-base reordering consonants:
 | |
| 	*
 | |
| 	*	If a pre-base reordering consonant is found, reorder it according to
 | |
| 	*	the following rules:
 | |
| 	*/
 | |
| 
 | |
| 
 | |
| 	if (count($GSUBdata['pref']) && $base + 1 < $end) { /* Otherwise there can't be any pre-base reordering Ra. */
 | |
| 		for ($i = $base + 1; $i < $end; $i++) {
 | |
| 			if ($info[$i]['mask'] & self::FLAG(self::PREF)) {
 | |
| 				/*	1. Only reorder a glyph produced by substitution during application
 | |
| 	 			*	of the <pref> feature. (Note that a font may shape a Ra consonant with
 | |
| 	 			*	the feature generally but block it in certain contexts.)
 | |
| 	 			*/
 | |
| // ??? Need to TEST if actual substitution has occurred
 | |
| 				if ($i + 1 == $end || ($info[$i + 1]['mask'] & self::FLAG(self::PREF)) == 0) {
 | |
| 					/*
 | |
| 					*	2. Try to find a target position the same way as for pre-base matra.
 | |
| 					*	If it is found, reorder pre-base consonant glyph.
 | |
| 					*
 | |
| 					*	3. If position is not found, reorder immediately before main
 | |
| 					*	consonant.
 | |
| 					*/
 | |
| 					$new_pos = $base;
 | |
| 					/* Malayalam / Tamil do not have "half" forms or explicit virama forms.
 | |
| 					* The glyphs formed by 'half' are Chillus or ligated explicit viramas.
 | |
| 					* We want to position matra after them.
 | |
| 					*/
 | |
| 					if ($scriptblock != UCDN::SCRIPT_MALAYALAM && $scriptblock != UCDN::SCRIPT_TAMIL) {
 | |
| 						while ($new_pos > $start &&
 | |
| 							!(self::is_one_of($info[$new_pos - 1], self::FLAG(self::OT_M) | self::FLAG(self::OT_H) | self::FLAG(self::OT_Coeng))))
 | |
| 							$new_pos--;
 | |
| 
 | |
| 						/* In Khmer coeng model, a V,Ra can go *after* matras. If it goes after a
 | |
| 						* split matra, it should be reordered to *before* the left part of such matra. */
 | |
| 						if ($new_pos > $start && $info[$new_pos - 1]['indic_category'] == self::OT_M) {
 | |
| 							$old_pos = i;
 | |
| 							for ($i = $base + 1; $i < $old_pos; $i++)
 | |
| 								if ($info[$i]['indic_category'] == self::OT_M) {
 | |
| 									$new_pos--;
 | |
| 									break;
 | |
| 								}
 | |
| 						}
 | |
| 					}
 | |
| 
 | |
| 					if ($new_pos > $start && self::is_halant_or_coeng($info[$new_pos - 1])) {
 | |
| 						/* -> If ZWJ or ZWNJ follow this halant, position is moved after it. */
 | |
| 						if ($new_pos < $end && self::is_joiner($info[$new_pos]))
 | |
| 							$new_pos++;
 | |
| 					}
 | |
| 
 | |
| 					$old_pos = $i;
 | |
| 					self::_move_info_pos($info, $old_pos, $new_pos);
 | |
| 
 | |
| 					if ($new_pos <= $base && $base < $old_pos)
 | |
| 						$base++;
 | |
| 				}
 | |
| 
 | |
| 				break;
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 	/* Apply 'init' to the Left Matra if it's a word start. */
 | |
| 	if ($info[$start]['indic_position'] == self::POS_PRE_M &&
 | |
| 		($start==0 || 
 | |
| 		($info[$start - 1]['general_category'] < UCDN::UNICODE_GENERAL_CATEGORY_FORMAT || $info[$start - 1]['general_category'] > UCDN::UNICODE_GENERAL_CATEGORY_NON_SPACING_MARK)
 | |
| 		)) {
 | |
| 		$info[$start]['mask'] |= self::FLAG(self::INIT);
 | |
| 	}
 | |
| 
 | |
| 
 | |
| 	/*
 | |
| 	* Finish off and go home!
 | |
| 	*/
 | |
| 
 | |
| }
 | |
| 
 | |
| function _move_info_pos(&$info, $from, $to) {
 | |
| 	$t = array();
 | |
| 	$t[0] = $info[$from];
 | |
| 	if ($from > $to) {
 | |
| 		array_splice($info, $from, 1);
 | |
| 		array_splice($info, $to, 0, $t);
 | |
| 	}
 | |
| 	else {
 | |
| 		array_splice($info, $to, 0, $t);
 | |
| 		array_splice($info, $from, 1);
 | |
| 	}
 | |
| }
 | |
| 
 | |
| 
 | |
| public static $ra_chars = array(
 | |
| 	0x0930 => 1, /* Devanagari */
 | |
| 	0x09B0 => 1, /* Bengali */
 | |
| 	0x09F0 => 1, /* Bengali (Assamese) */
 | |
| 	0x0A30 => 1, /* Gurmukhi */		/* No Reph */
 | |
| 	0x0AB0 => 1, /* Gujarati */
 | |
| 	0x0B30 => 1, /* Oriya */
 | |
| 	0x0BB0 => 1, /* Tamil */		/* No Reph */
 | |
| 	0x0C30 => 1, /* Telugu */		/* Reph formed only with ZWJ */
 | |
| 	0x0CB0 => 1, /* Kannada */
 | |
| 	0x0D30 => 1, /* Malayalam */		/* No Reph, Logical Repha */
 | |
| 
 | |
| 	0x0DBB => 1, /* Sinhala */		/* Reph formed only with ZWJ */
 | |
| 	0x179A => 1, /* Khmer */		/* No Reph, Visual Repha */
 | |
| );
 | |
| 
 | |
| public static function is_ra ($u) {
 | |
| 	if (isset(self::$ra_chars[$u])) return true;
 | |
| 	return false;
 | |
| }
 | |
| 
 | |
| public static function is_one_of ($info, $flags) {
 | |
| 	if (isset($info['is_ligature']) && $info['is_ligature']) return false;	/* If it ligated, all bets are off. */
 | |
| 	return !!(self::FLAG($info['indic_category']) & $flags);
 | |
| }
 | |
| 
 | |
| public static function is_joiner($info) {
 | |
| 	return self::is_one_of ($info, (self::FLAG(self::OT_ZWJ) | self::FLAG(self::OT_ZWNJ)));
 | |
| }
 | |
| 
 | |
| 
 | |
| /* Vowels and placeholders treated as if they were consonants. */
 | |
| public static function is_consonant($info) {
 | |
| 	return self::is_one_of($info, (self::FLAG(self::OT_C) | self::FLAG(self::OT_CM) | self::FLAG(self::OT_Ra) | self::FLAG(self::OT_V) | self::FLAG(self::OT_NBSP) | self::FLAG(self::OT_DOTTEDCIRCLE)));
 | |
| }
 | |
| 
 | |
| 
 | |
| public static function is_halant_or_coeng($info) {
 | |
| 	return self::is_one_of($info, (self::FLAG(self::OT_H) | self::FLAG(self::OT_Coeng)));
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| // From hb-private.hh
 | |
| public static function in_range ($u, $lo, $hi) {
 | |
| 	if ( (($lo^$hi) & $lo) == 0 && (($lo^$hi) & $hi) == ($lo^$hi) && (($lo^$hi) & (($lo^$hi) + 1)) == 0 )
 | |
| 		return ($u & ~($lo^$hi)) == $lo;
 | |
| 	else
 | |
| 		return $lo <= $u && $u <= $hi;
 | |
| }
 | |
| // From hb-private.hh
 | |
| public static function FLAG($x) { return (1<<($x)); }
 | |
| 
 | |
| 
 | |
| // BELOW from hb-ot-shape-complex-indic.cc
 | |
| 
 | |
| /*
 | |
|  * Indic configurations.	
 | |
|  */
 | |
| 
 | |
| // base_position
 | |
| const BASE_POS_FIRST = 0;
 | |
| const BASE_POS_LAST = 1;
 | |
| 
 | |
| // reph_position
 | |
| const REPH_POS_DEFAULT 		= 10;	// POS_BEFORE_POST,
 | |
| 
 | |
| const REPH_POS_AFTER_MAIN	= 5;	// POS_AFTER_MAIN,
 | |
| const REPH_POS_BEFORE_SUB	= 7;	// POS_BEFORE_SUB,
 | |
| const REPH_POS_AFTER_SUB	= 9;	// POS_AFTER_SUB,
 | |
| const REPH_POS_BEFORE_POST 	= 10;	// POS_BEFORE_POST,
 | |
| const REPH_POS_AFTER_POST	= 12;	// POS_AFTER_POST
 | |
| 
 | |
| // reph_mode
 | |
| const REPH_MODE_IMPLICIT = 0;		/* Reph formed out of initial Ra,H sequence. */
 | |
| const REPH_MODE_EXPLICIT = 1;		/* Reph formed out of initial Ra,H,ZWJ sequence. */
 | |
| const REPH_MODE_VIS_REPHA = 2;	/* Encoded Repha character, no reordering needed. */
 | |
| const REPH_MODE_LOG_REPHA = 3;	/* Encoded Repha character, needs reordering. */
 | |
| 
 | |
| 
 | |
| 
 | |
| /*
 | |
| struct of indic_configs{
 | |
| 	KEY - script;
 | |
| 	0 - has_old_spec;
 | |
| 	1 - virama;
 | |
| 	2 - base_pos;
 | |
| 	3 - reph_pos;
 | |
| 	4 - reph_mode;
 | |
| };
 | |
| */
 | |
| 
 | |
| public static $indic_configs = array(	/* index is SCRIPT_number from UCDN */
 | |
| 	9 => array(true, 0x094D, 1, 10, 0),
 | |
| 	10 => array(true, 0x09CD, 1, 9, 0),
 | |
| 	11 => array(true, 0x0A4D, 1, 7, 0),
 | |
| 	12 => array(true, 0x0ACD, 1, 10, 0),
 | |
| 	13 => array(true, 0x0B4D, 1, 5, 0),
 | |
| 	14 => array(true, 0x0BCD, 1, 12, 0),
 | |
| 	15 => array(true, 0x0C4D, 1, 12, 1),
 | |
| 	16 => array(true, 0x0CCD, 1, 12, 0),
 | |
| 	17 => array(true, 0x0D4D, 1, 5, 3),
 | |
| 	18 => array(false, 0x0DCA, 0, 5, 1),	/* Sinhala */
 | |
| 	30 => array(false, 0x17D2, 0, 10, 2),	/* Khmer */
 | |
| 	84 => array(false, 0xA9C0, 1, 10, 0),	/* Javanese */
 | |
| 
 | |
| );
 | |
| 
 | |
| 
 | |
| 
 | |
| /*
 | |
| 
 | |
| // from "hb-ot-shape-complex-indic-table.cc"
 | |
| 
 | |
| 
 | |
| const ISC_A	 = 0; //	INDIC_SYLLABIC_CATEGORY_AVAGRAHA		Avagraha
 | |
| const ISC_Bi = 8; //	INDIC_SYLLABIC_CATEGORY_BINDU			Bindu
 | |
| const ISC_C	 = 1; //	INDIC_SYLLABIC_CATEGORY_CONSONANT		Consonant
 | |
| const ISC_CD = 1; //	INDIC_SYLLABIC_CATEGORY_CONSONANT_DEAD		Consonant_Dead
 | |
| const ISC_CF = 17; //	INDIC_SYLLABIC_CATEGORY_CONSONANT_FINAL		Consonant_Final
 | |
| const ISC_CHL = 1; //	INDIC_SYLLABIC_CATEGORY_CONSONANT_HEAD_LETTER	Consonant_Head_Letter
 | |
| const ISC_CM = 17; //	INDIC_SYLLABIC_CATEGORY_CONSONANT_MEDIAL		Consonant_Medial
 | |
| const ISC_CP = 11; //	INDIC_SYLLABIC_CATEGORY_CONSONANT_PLACEHOLDER	Consonant_Placeholder
 | |
| const ISC_CR = 15; //	INDIC_SYLLABIC_CATEGORY_CONSONANT_REPHA		Consonant_Repha
 | |
| const ISC_CS = 1; //	INDIC_SYLLABIC_CATEGORY_CONSONANT_SUBJOINED	Consonant_Subjoined
 | |
| const ISC_ML = 0; //	INDIC_SYLLABIC_CATEGORY_MODIFYING_LETTER	Modifying_Letter
 | |
| const ISC_N	 = 3; //	INDIC_SYLLABIC_CATEGORY_NUKTA			Nukta
 | |
| const ISC_x	 = 0; //	INDIC_SYLLABIC_CATEGORY_OTHER			Other
 | |
| const ISC_RS = 13; //	INDIC_SYLLABIC_CATEGORY_REGISTER_SHIFTER	Register_Shifter
 | |
| const ISC_TL = 0; //	INDIC_SYLLABIC_CATEGORY_TONE_LETTER		Tone_Letter
 | |
| const ISC_TM = 3; //	INDIC_SYLLABIC_CATEGORY_TONE_MARK		Tone_Mark
 | |
| const ISC_V	 = 4; //	INDIC_SYLLABIC_CATEGORY_VIRAMA		Virama
 | |
| const ISC_Vs = 8; //	INDIC_SYLLABIC_CATEGORY_VISARGA		Visarga
 | |
| const ISC_Vo = 2; //	INDIC_SYLLABIC_CATEGORY_VOWEL			Vowel
 | |
| const ISC_M	 = 7; //	INDIC_SYLLABIC_CATEGORY_VOWEL_DEPENDENT	Vowel_Dependent
 | |
| const ISC_VI = 2; //	INDIC_SYLLABIC_CATEGORY_VOWEL_INDEPENDENT	Vowel_Independent
 | |
| 
 | |
| const IMC_B	 = 8; //	INDIC_MATRA_CATEGORY_BOTTOM			Bottom
 | |
| const IMC_BR = 11; //	INDIC_MATRA_CATEGORY_BOTTOM_AND_RIGHT	Bottom_And_Right
 | |
| const IMC_I	 = 15; //	INDIC_MATRA_CATEGORY_INVISIBLE		Invisible
 | |
| const IMC_L	 = 3; //	INDIC_MATRA_CATEGORY_LEFT			Left
 | |
| const IMC_LR = 11; //	INDIC_MATRA_CATEGORY_LEFT_AND_RIGHT		Left_And_Right
 | |
| const IMC_x	 = 15; //	INDIC_MATRA_CATEGORY_NOT_APPLICABLE		Not_Applicable
 | |
| const IMC_O	 = 5; //	INDIC_MATRA_CATEGORY_OVERSTRUCK		Overstruck
 | |
| const IMC_R	 = 11; //	INDIC_MATRA_CATEGORY_RIGHT			Right
 | |
| const IMC_T	 = 6; //	INDIC_MATRA_CATEGORY_TOP			Top
 | |
| const IMC_TB = 8; //	INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM		Top_And_Bottom
 | |
| const IMC_TBR = 11; //	INDIC_MATRA_CATEGORY_TOP_AND_BOTTOM_AND_RIGHT	Top_And_Bottom_And_Right
 | |
| const IMC_TL = 6; //	INDIC_MATRA_CATEGORY_TOP_AND_LEFT		Top_And_Left
 | |
| const IMC_TLR = 11; //	INDIC_MATRA_CATEGORY_TOP_AND_LEFT_AND_RIGHT	Top_And_Left_And_Right
 | |
| const IMC_TR = 11; //	INDIC_MATRA_CATEGORY_TOP_AND_RIGHT		Top_And_Right
 | |
| const IMC_VOL = 2; //	INDIC_MATRA_CATEGORY_VISUAL_ORDER_LEFT		Visual_Order_Left
 | |
| 
 | |
| If in original table = _(C,x), that = ISC_C,IMC_x
 | |
| Value is IMC_x << 8 (or IMC_x * 256) = 3840
 | |
| plus ISC_C = 1, so = 3841
 | |
| 
 | |
|  */
 | |
| 
 | |
| 
 | |
| 
 | |
| public static $indic_table = array(
 | |
| 
 | |
|   /* Devanagari  (0900..097F) */
 | |
| 
 | |
|   /* 0900 */ 3848,3848,3848,3848,3842,3842,3842,3842,
 | |
|   /* 0908 */ 3842,3842,3842,3842,3842,3842,3842,3842,
 | |
|   /* 0910 */ 3842,3842,3842,3842,3842, 3841, 3841, 3841,
 | |
|   /* 0918 */  3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 0920 */  3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 0928 */  3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 0930 */  3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 0938 */  3841, 3841, 1543, 2823, 3843, 3840, 2823, 775,
 | |
|   /* 0940 */  2823, 2055, 2055, 2055, 2055, 1543, 1543, 1543,
 | |
|   /* 0948 */  1543, 2823, 2823, 2823, 2823, 2052, 775, 2823,
 | |
|   /* 0950 */  3840, 3840, 3840, 3840, 3840, 1543, 2055, 2055,
 | |
|   /* 0958 */  3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 0960 */ 3842,3842, 2055, 2055, 3840, 3840, 3840, 3840,
 | |
|   /* 0968 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 0970 */  3840, 3840,3842,3842,3842,3842,3842,3842,
 | |
|   /* 0978 */  3840, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
| 
 | |
|   /* Bengali  (0980..09FF) */
 | |
| 
 | |
|   /* 0980 */  3840,3848,3848,3848, 3840,3842,3842,3842,
 | |
|   /* 0988 */ 3842,3842,3842,3842,3842, 3840, 3840,3842,
 | |
|   /* 0990 */ 3842, 3840, 3840,3842,3842, 3841, 3841, 3841,
 | |
|   /* 0998 */  3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 09A0 */  3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 09A8 */  3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 09B0 */  3841, 3840, 3841, 3840, 3840, 3840, 3841, 3841,
 | |
|   /* 09B8 */  3841, 3841, 3840, 3840, 3843, 3840, 2823, 775,
 | |
|   /* 09C0 */  2823, 2055, 2055, 2055, 2055, 3840, 3840, 775,
 | |
|   /* 09C8 */  775, 3840, 3840,2823,2823, 2052,3841, 3840,
 | |
|   /* 09D0 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 2823,
 | |
|   /* 09D8 */  3840, 3840, 3840, 3840, 3841, 3841, 3840, 3841,
 | |
|   /* 09E0 */ 3842,3842, 2055, 2055, 3840, 3840, 3840, 3840,
 | |
|   /* 09E8 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 09F0 */  3841, 3841, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 09F8 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
| 
 | |
|   /* Gurmukhi  (0A00..0A7F) */
 | |
| 
 | |
|   /* 0A00 */  3840,3848,3848,3848, 3840,3842,3842,3842,
 | |
|   /* 0A08 */ 3842,3842,3842, 3840, 3840, 3840, 3840,3842,
 | |
|   /* 0A10 */ 3842, 3840, 3840,3842,3842, 3841, 3841, 3841,
 | |
|   /* 0A18 */  3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 0A20 */  3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 0A28 */  3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 0A30 */  3841, 3840, 3841, 3841, 3840, 3841, 3841, 3840,
 | |
|   /* 0A38 */  3841, 3841, 3840, 3840, 3843, 3840, 2823, 775,
 | |
|   /* 0A40 */  2823, 2055, 2055, 3840, 3840, 3840, 3840, 1543,
 | |
|   /* 0A48 */  1543, 3840, 3840, 1543, 1543, 2052, 3840, 3840,
 | |
|   /* 0A50 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 0A58 */  3840, 3841, 3841, 3841, 3841, 3840, 3841, 3840,
 | |
|   /* 0A60 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 0A68 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 0A70 */ 3848, 3840,13841,13841, 3840, 3857, 3840, 3840,
 | |
|   /* 0A78 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
| 
 | |
|   /* Gujarati  (0A80..0AFF) */
 | |
| 
 | |
|   /* 0A80 */  3840,3848,3848,3848, 3840,3842,3842,3842,
 | |
|   /* 0A88 */ 3842,3842,3842,3842,3842,3842, 3840,3842,
 | |
|   /* 0A90 */ 3842,3842, 3840,3842,3842, 3841, 3841, 3841,
 | |
|   /* 0A98 */  3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 0AA0 */  3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 0AA8 */  3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 0AB0 */  3841, 3840, 3841, 3841, 3840, 3841, 3841, 3841,
 | |
|   /* 0AB8 */  3841, 3841, 3840, 3840, 3843, 3840, 2823, 775,
 | |
|   /* 0AC0 */  2823, 2055, 2055, 2055, 2055, 1543, 3840, 1543,
 | |
|   /* 0AC8 */  1543,2823, 3840, 2823, 2823, 2052, 3840, 3840,
 | |
|   /* 0AD0 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 0AD8 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 0AE0 */ 3842,3842, 2055, 2055, 3840, 3840, 3840, 3840,
 | |
|   /* 0AE8 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 0AF0 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 0AF8 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
| 
 | |
|   /* Oriya  (0B00..0B7F) */
 | |
| 
 | |
|   /* 0B00 */  3840,3848,3848,3848, 3840,3842,3842,3842,
 | |
|   /* 0B08 */  3842,3842,3842,3842,3842, 3840, 3840,3842,
 | |
|   /* 0B10 */  3842, 3840, 3840,3842,3842, 3841, 3841, 3841,
 | |
|   /* 0B18 */  3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 0B20 */  3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 0B28 */  3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 0B30 */  3841, 3840, 3841, 3841, 3840, 3841, 3841, 3841,
 | |
|   /* 0B38 */  3841, 3841, 3840, 3840, 3843, 3840, 2823, 1543,
 | |
|   /* 0B40 */  2823, 2055, 2055, 2055, 2055, 3840, 3840, 775,
 | |
|   /* 0B48 */  1543, 3840, 3840,2823,2823,2052, 3840, 3840,
 | |
|   /* 0B50 */  3840, 3840, 3840, 3840, 3840, 3840, 1543,2823,
 | |
|   /* 0B58 */  3840, 3840, 3840, 3840, 3841, 3841, 3840, 3841,
 | |
|   /* 0B60 */  3842,3842, 2055, 2055, 3840, 3840, 3840, 3840,
 | |
|   /* 0B68 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 0B70 */  3840, 3841, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 0B78 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
| 
 | |
|   /* Tamil  (0B80..0BFF) */
 | |
| 
 | |
|   /* 0B80 */  3840, 3840, 3848, 3840, 3840, 3842, 3842, 3842,
 | |
|   /* 0B88 */  3842, 3842, 3842, 3840, 3840, 3840, 3842,3842,
 | |
|   /* 0B90 */  3842, 3840, 3842, 3842, 3842, 3841, 3840, 3840,
 | |
|   /* 0B98 */  3840, 3841, 3841, 3840, 3841, 3840, 3841, 3841,
 | |
|   /* 0BA0 */  3840, 3840, 3840, 3841, 3841, 3840, 3840, 3840,
 | |
|   /* 0BA8 */  3841, 3841, 3841, 3840, 3840, 3840, 3841, 3841,
 | |
|   /* 0BB0 */  3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 0BB8 */  3841, 3841, 3840, 3840, 3840, 3840, 2823, 2823,
 | |
|   /* 0BC0 */  1543, 2055, 2055, 3840, 3840, 3840, 775, 775,
 | |
|   /* 0BC8 */  775, 3840, 2823, 2823, 2823, 1540, 3840, 3840,
 | |
|   /* 0BD0 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 2823,
 | |
|   /* 0BD8 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 0BE0 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 0BE8 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 0BF0 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 0BF8 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
| 
 | |
|   /* Telugu  (0C00..0C7F) */
 | |
| 
 | |
|   /* 0C00 */  3840,3848,3848,3848, 3840,3842,3842,3842,
 | |
|   /* 0C08 */ 3842,3842,3842,3842,3842, 3840,3842,3842,
 | |
|   /* 0C10 */ 3842, 3840,3842,3842,3842, 3841, 3841, 3841,
 | |
|   /* 0C18 */  3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 0C20 */  3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 0C28 */  3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 0C30 */  3841, 3841, 3841, 3841, 3840, 3841, 3841, 3841,
 | |
|   /* 0C38 */  3841, 3841, 3840, 3840, 3840, 3840, 1543, 1543,
 | |
|   /* 0C40 */  1543, 2823, 2823, 2823, 2823, 3840, 1543, 1543,
 | |
|   /* 0C48 */ 2055, 3840, 1543, 1543, 1543, 1540, 3840, 3840,
 | |
|   /* 0C50 */  3840, 3840, 3840, 3840, 3840, 1543, 2055, 3840,
 | |
|   /* 0C58 */  3841, 3841, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 0C60 */ 3842,3842, 2055, 2055, 3840, 3840, 3840, 3840,
 | |
|   /* 0C68 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 0C70 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 0C78 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
| 
 | |
|   /* Kannada  (0C80..0CFF) */
 | |
| 
 | |
|   /* 0C80 */  3840, 3840,3848,3848, 3840,3842,3842,3842,
 | |
|   /* 0C88 */ 3842,3842,3842,3842,3842, 3840,3842,3842,
 | |
|   /* 0C90 */ 3842, 3840,3842,3842,3842, 3841, 3841, 3841,
 | |
|   /* 0C98 */  3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 0CA0 */  3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 0CA8 */  3841, 3840, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 0CB0 */  3841, 3841, 3841, 3841, 3840, 3841, 3841, 3841,
 | |
|   /* 0CB8 */  3841, 3841, 3840, 3840, 3843, 3840, 2823, 1543,
 | |
|   /* 0CC0 */ 2823, 2823, 2823, 2823, 2823, 3840, 1543,2823,
 | |
|   /* 0CC8 */ 2823, 3840,2823,2823, 1543, 1540, 3840, 3840,
 | |
|   /* 0CD0 */  3840, 3840, 3840, 3840, 3840, 2823, 2823, 3840,
 | |
|   /* 0CD8 */  3840, 3840, 3840, 3840, 3840, 3840, 3841, 3840,
 | |
|   /* 0CE0 */ 3842,3842, 2055, 2055, 3840, 3840, 3840, 3840,
 | |
|   /* 0CE8 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 0CF0 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 0CF8 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
| 
 | |
|   /* Malayalam  (0D00..0D7F) */
 | |
| 
 | |
|   /* 0D00 */  3840, 3840,3848,3848, 3840,3842,3842,3842,
 | |
|   /* 0D08 */ 3842,3842,3842,3842,3842, 3840,3842,3842,
 | |
|   /* 0D10 */ 3842, 3840,3842,3842,3842, 3841, 3841, 3841,
 | |
|   /* 0D18 */  3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 0D20 */  3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 0D28 */  3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 0D30 */  3841, 3841, 3841, 3841, 3841, 3841, 3841, 3841,
 | |
|   /* 0D38 */  3841, 3841, 3841, 3840, 3840, 3840, 2823, 2823,
 | |
|   /* 0D40 */  2823, 2823, 2823, 2055, 2055, 3840, 775, 775,
 | |
|   /* 0D48 */  775, 3840,2823,2823,2823, 1540, 3855, 3840,
 | |
|   /* 0D50 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 2823,
 | |
|   /* 0D58 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 0D60 */ 3842,3842, 2055, 2055, 3840, 3840, 3840, 3840,
 | |
|   /* 0D68 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 0D70 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 0D78 */  3840, 3840,3841,3841,3841,3841,3841,3841,
 | |
| 
 | |
|   /* Sinhala  (0D80..0DFF) */
 | |
| 
 | |
|   /* 0D80 */  3840,  3840, 3848, 3848,  3840, 3842, 3842, 3842,
 | |
|   /* 0D88 */  3842, 3842, 3842, 3842, 3842, 3842, 3842, 3842,
 | |
|   /* 0D90 */  3842, 3842, 3842, 3842, 3842, 3842, 3842,  3840,
 | |
|   /* 0D98 */  3840,  3840,  3841,  3841,  3841,  3841,  3841,  3841,
 | |
|   /* 0DA0 */  3841,  3841,  3841,  3841,  3841,  3841,  3841,  3841,
 | |
|   /* 0DA8 */  3841,  3841,  3841,  3841,  3841,  3841,  3841,  3841,
 | |
|   /* 0DB0 */  3841,  3841,  3840,  3841,  3841,  3841,  3841,  3841,
 | |
|   /* 0DB8 */  3841,  3841,  3841,  3841,  3840,  3841,  3840,  3840,
 | |
|   /* 0DC0 */  3841,  3841,  3841,  3841,  3841,  3841,  3841,  3840,
 | |
|   /* 0DC8 */  3840,  3840,  1540,  3840,  3840,  3840,  3840,  2823,
 | |
|   /* 0DD0 */  2823,  2823,  1543,  1543,  2055,  3840,  2055,  3840,
 | |
|   /* 0DD8 */  2823,  775, 1543,  775, 2823, 2823, 2823,  2823,
 | |
|   /* 0DE0 */  3840,  3840,  3840,  3840,  3840,  3840,  3840,  3840,
 | |
|   /* 0DE8 */  3840,  3840,  3840,  3840,  3840,  3840,  3840,  3840,
 | |
|   /* 0DF0 */  3840,  3840,  2823,  2823,  3840,  3840,  3840,  3840,
 | |
|   /* 0DF8 */  3840,  3840,  3840,  3840,  3840,  3840,  3840,  3840,
 | |
| 
 | |
| 
 | |
|   /* Vedic Extensions  (1CD0..1CFF) */
 | |
| 
 | |
|   /* 1CD0 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 1CD8 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 1CE0 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 1CE8 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
|   /* 1CF0 */  3840, 3840,3848,3848, 3840, 3840, 3840, 3840,
 | |
|   /* 1CF8 */  3840, 3840, 3840, 3840, 3840, 3840, 3840, 3840,
 | |
| 
 | |
| 
 | |
| );
 | |
| 
 | |
| public static $khmer_table = array(
 | |
| 
 | |
|   /* Khmer  (1780..17FF) */
 | |
| 
 | |
|   /* 1780 */  3841,  3841,  3841,  3841,  3841,  3841,  3841,  3841,
 | |
|   /* 1788 */  3841,  3841,  3841,  3841,  3841,  3841,  3841,  3841,
 | |
|   /* 1790 */  3841,  3841,  3841,  3841,  3841,  3841,  3841,  3841,
 | |
|   /* 1798 */  3841,  3841,  3841,  3841,  3841,  3841,  3841,  3841,
 | |
|   /* 17A0 */  3841,  3841,  3841, 3842, 3842, 3842, 3842, 3842,
 | |
|   /* 17A8 */  3842, 3842, 3842, 3842, 3842, 3842, 3842, 3842,
 | |
|   /* 17B0 */  3842, 3842, 3842, 3842,  3840,  3840,  2823,  1543,
 | |
|   /* 17B8 */  1543,  1543,  1543,  2055,  2055,  2055, 1543,2823,
 | |
|   /* 17C0 */  2823,  775,  775,  775, 2823, 2823, 3848, 3848,
 | |
|   /* 17C8 */  2823, 3853, 3853,  3840, 3855,  3840,  3840,  3840,
 | |
|   /* 17D0 */  3840,  1540,  3844,  3840,  3840,  3840,  3840,  3840,
 | |
|   /* 17D8 */  3840,  3840,  3840,  3840,  3840,  3840,  3840,  3840,
 | |
|   /* 17E0 */  3840,  3840,  3840,  3840,  3840,  3840,  3840,  3840,
 | |
|   /* 17E8 */  3840,  3840,  3840,  3840,  3840,  3840,  3840,  3840,
 | |
|   /* 17F0 */  3840,  3840,  3840,  3840,  3840,  3840,  3840,  3840,
 | |
|   /* 17F8 */  3840,  3840,  3840,  3840,  3840,  3840,  3840,  3840,
 | |
| 
 | |
| 
 | |
| );
 | |
| 
 | |
| 
 | |
| 
 | |
| // from "hb-ot-shape-complex-indic-table.cc"
 | |
| public static function indic_get_categories ($u) {
 | |
|   if (0x0900 <= $u && $u <= 0x0DFF) return self::$indic_table[$u - 0x0900 + 0];	// offset 0 for Most "indic"
 | |
|   if (0x1CD0 <= $u && $u <= 0x1D00) return self::$indic_table[$u - 0x1CD0 + 1152];	// offset for Vedic extensions
 | |
|   if (0x1780 <= $u && $u <= 0x17FF) return self::$khmer_table[$u - 0x1780];		// Khmer
 | |
|   if ($u == 0x00A0) return 3851;	// (ISC_CP | (IMC_x << 8))
 | |
|   if ($u == 0x25CC) return 3851;	// (ISC_CP | (IMC_x << 8))
 | |
|   return 3840;	// (ISC_x | (IMC_x << 8))
 | |
| }
 | |
| 
 | |
| // BELOW from hb-ot-shape-complex-indic.cc
 | |
| /*
 | |
|  * Indic shaper.
 | |
|  */
 | |
| 
 | |
| public static function IN_HALF_BLOCK($u, $Base) { return (($u & ~0x7F) == $Base); }
 | |
| 
 | |
| public static function IS_DEVA($u) { return self::IN_HALF_BLOCK ($u, 0x0900); }
 | |
| public static function IS_BENG($u) { return self::IN_HALF_BLOCK ($u, 0x0980); }
 | |
| public static function IS_GURU($u) { return self::IN_HALF_BLOCK ($u, 0x0A00); }
 | |
| public static function IS_GUJR($u) { return self::IN_HALF_BLOCK ($u, 0x0A80); }
 | |
| public static function IS_ORYA($u) { return self::IN_HALF_BLOCK ($u, 0x0B00); }
 | |
| public static function IS_TAML($u) { return self::IN_HALF_BLOCK ($u, 0x0B80); }
 | |
| public static function IS_TELU($u) { return self::IN_HALF_BLOCK ($u, 0x0C00); }
 | |
| public static function IS_KNDA($u) { return self::IN_HALF_BLOCK ($u, 0x0C80); }
 | |
| public static function IS_MLYM($u) { return self::IN_HALF_BLOCK ($u, 0x0D00); }
 | |
| public static function IS_SINH($u) { return self::IN_HALF_BLOCK ($u, 0x0D80); }
 | |
| public static function IS_KHMR($u) { return self::IN_HALF_BLOCK ($u, 0x1780); }
 | |
| 
 | |
| 
 | |
| public static function MATRA_POS_LEFT($u) 	{ return self::POS_PRE_M; }
 | |
| public static function MATRA_POS_RIGHT($u) 	{ return 
 | |
| 					(self::IS_DEVA($u) ? self::POS_AFTER_SUB  : 
 | |
| 					(self::IS_BENG($u) ? self::POS_AFTER_POST : 
 | |
| 					(self::IS_GURU($u) ? self::POS_AFTER_POST : 
 | |
| 					(self::IS_GUJR($u) ? self::POS_AFTER_POST : 
 | |
| 					(self::IS_ORYA($u) ? self::POS_AFTER_POST : 
 | |
| 					(self::IS_TAML($u) ? self::POS_AFTER_POST : 
 | |
| 					(self::IS_TELU($u) ? ($u <= 0x0C42 ? self::POS_BEFORE_SUB : self::POS_AFTER_SUB) : 
 | |
| 					(self::IS_KNDA($u) ? ($u < 0x0CC3 || $u > 0xCD6 ? self::POS_BEFORE_SUB : self::POS_AFTER_SUB) : 
 | |
| 					(self::IS_MLYM($u) ? self::POS_AFTER_POST : 
 | |
| 					(self::IS_SINH($u) ? self::POS_AFTER_SUB  :
 | |
| 					(self::IS_KHMR($u) ? self::POS_AFTER_POST :
 | |
| 					self::POS_AFTER_SUB)))))))))));	/*default*/
 | |
| 				}
 | |
| public static function MATRA_POS_TOP($u) 		{ return  /* BENG and MLYM don't have top matras. */ 
 | |
| 					(self::IS_DEVA($u) ? self::POS_AFTER_SUB	: 
 | |
| 					(self::IS_GURU($u) ? self::POS_AFTER_POST : /* Deviate from spec */ 
 | |
| 					(self::IS_GUJR($u) ? self::POS_AFTER_SUB	: 
 | |
| 					(self::IS_ORYA($u) ? self::POS_AFTER_MAIN : 
 | |
| 					(self::IS_TAML($u) ? self::POS_AFTER_SUB	: 
 | |
| 					(self::IS_TELU($u) ? self::POS_BEFORE_SUB :
 | |
| 					(self::IS_KNDA($u) ? self::POS_BEFORE_SUB : 
 | |
| 					(self::IS_SINH($u) ? self::POS_AFTER_SUB  : 
 | |
| 					(self::IS_KHMR($u) ? self::POS_AFTER_POST :
 | |
| 					self::POS_AFTER_SUB)))))))));	/*default*/
 | |
| 				}
 | |
| public static function MATRA_POS_BOTTOM($u)	{ return 
 | |
| 					(self::IS_DEVA($u) ? self::POS_AFTER_SUB	: 
 | |
| 					(self::IS_BENG($u) ? self::POS_AFTER_SUB	: 
 | |
| 					(self::IS_GURU($u) ? self::POS_AFTER_POST : 
 | |
| 					(self::IS_GUJR($u) ? self::POS_AFTER_POST : 
 | |
| 					(self::IS_ORYA($u) ? self::POS_AFTER_SUB	: 
 | |
| 					(self::IS_TAML($u) ? self::POS_AFTER_POST : 
 | |
| 					(self::IS_TELU($u) ? self::POS_BEFORE_SUB :
 | |
| 					(self::IS_KNDA($u) ? self::POS_BEFORE_SUB : 
 | |
| 					(self::IS_MLYM($u) ? self::POS_AFTER_POST : 
 | |
| 					(self::IS_SINH($u) ? self::POS_AFTER_SUB  : 
 | |
| 					(self::IS_KHMR($u) ? self::POS_AFTER_POST :
 | |
| 					self::POS_AFTER_SUB)))))))))));	/*default*/
 | |
| 				}
 | |
| 
 | |
| public static function matra_position ($u, $side) {
 | |
| 	switch ($side) {
 | |
| 		case self::POS_PRE_C:	return self::MATRA_POS_LEFT($u);
 | |
| 		case self::POS_POST_C:	return self::MATRA_POS_RIGHT($u);
 | |
| 		case self::POS_ABOVE_C:	return self::MATRA_POS_TOP($u);
 | |
| 		case self::POS_BELOW_C:	return self::MATRA_POS_BOTTOM($u);
 | |
| 	}
 | |
| 	return $side;
 | |
| }
 | |
| 
 | |
| // vowel matras that have to be split into two parts.
 | |
| // From Harfbuzz (old)
 | |
| // New HarfBuzz uses /src/hb-ucdn/ucdn.c and unicodedata_db.h for full method of decomposition for all characters
 | |
| // Should always fully decompose and then recompose back, but we will just do the split matras
 | |
| public static function decompose_indic($ab) {
 | |
| 	$sub = array();
 | |
| 	switch ($ab) {
 | |
| 		/*
 | |
| 		* Decompose split matras.
 | |
| 		*/
 | |
| 	/* bengali */
 | |
| 		case 0x9cb 	: $sub[0] = 0x9c7; $sub[1]= 0x9be; return $sub;
 | |
| 		case 0x9cc 	: $sub[0] = 0x9c7; $sub[1]= 0x9d7; return $sub;
 | |
| 	/* oriya */
 | |
| 		case 0xb48 	: $sub[0] = 0xb47; $sub[1]= 0xb56; return $sub;
 | |
| 		case 0xb4b 	: $sub[0] = 0xb47; $sub[1]= 0xb3e; return $sub;
 | |
| 		case 0xb4c 	: $sub[0] = 0xb47; $sub[1]= 0xb57; return $sub;
 | |
| 	/* tamil */
 | |
| 		case 0xbca 	: $sub[0] = 0xbc6; $sub[1]= 0xbbe; return $sub;
 | |
| 		case 0xbcb 	: $sub[0] = 0xbc7; $sub[1]= 0xbbe; return $sub;
 | |
| 		case 0xbcc 	: $sub[0] = 0xbc6; $sub[1]= 0xbd7; return $sub;
 | |
| 	/* telugu */
 | |
| 		case 0xc48 	: $sub[0] = 0xc46; $sub[1]= 0xc56; return $sub;
 | |
| 	/* kannada */
 | |
| 		case 0xcc0 	: $sub[0] = 0xcbf; $sub[1]= 0xcd5; return $sub;
 | |
| 		case 0xcc7 	: $sub[0] = 0xcc6; $sub[1]= 0xcd5; return $sub;
 | |
| 		case 0xcc8 	: $sub[0] = 0xcc6; $sub[1]= 0xcd6; return $sub;
 | |
| 		case 0xcca 	: $sub[0] = 0xcc6; $sub[1]= 0xcc2; return $sub;
 | |
| 		case 0xccb 	: $sub[0] = 0xcc6; $sub[1]= 0xcc2; $sub[2]= 0xcd5; return $sub;
 | |
| 	/* malayalam */
 | |
| 		case 0xd4a 	: $sub[0] = 0xd46; $sub[1]= 0xd3e; return $sub;
 | |
| 		case 0xd4b 	: $sub[0] = 0xd47; $sub[1]= 0xd3e; return $sub;
 | |
| 		case 0xd4c 	: $sub[0] = 0xd46; $sub[1]= 0xd57; return $sub;
 | |
| 	/* sinhala */
 | |
| 	// NB Some fonts break with these Sinhala decomps (although this is Uniscribe spec)
 | |
| 	// Can check if character would be substituted by pstf and only decompose if true
 | |
| 	// e.g. if (isset($GSUBdata['pstf'][$ab])) - would need to pass $GSUBdata as parameter to this function
 | |
| 		case 0xdda	: $sub[0] = 0xdd9; $sub[1]= 0xdca; return $sub;
 | |
| 		case 0xddc	: $sub[0] = 0xdd9; $sub[1]= 0xdcf; return $sub;
 | |
| 		case 0xddd	: $sub[0] = 0xdd9; $sub[1]= 0xdcf; $sub[2]= 0xdca; return $sub;
 | |
| 		case 0xdde	: $sub[0] = 0xdd9; $sub[1]= 0xddf; return $sub;
 | |
| 	/* khmer */
 | |
| 		case 0x17be : $sub[0] = 0x17c1; $sub[1]= 0x17be; return $sub;
 | |
| 		case 0x17bf : $sub[0] = 0x17c1; $sub[1]= 0x17bf; return $sub;
 | |
| 		case 0x17c0 : $sub[0] = 0x17c1; $sub[1]= 0x17c0; return $sub;
 | |
| 
 | |
| 		case 0x17c4 : $sub[0] = 0x17c1; $sub[1]= 0x17c4; return $sub;
 | |
| 		case 0x17c5 : $sub[0] = 0x17c1; $sub[1]= 0x17c5; return $sub;
 | |
| 	/* tibetan - included here although does not use Inidc shaper in other ways  */
 | |
| 		case 0xf73  : $sub[0] = 0xf71; $sub[1]= 0xf72; return $sub;
 | |
| 		case 0xf75  : $sub[0] = 0xf71; $sub[1]= 0xf74; return $sub;
 | |
| 		case 0xf76  : $sub[0] = 0xfb2; $sub[1]= 0xf80; return $sub;
 | |
| 		case 0xf77  : $sub[0] = 0xfb2; $sub[1]= 0xf81; return $sub;
 | |
| 		case 0xf78  : $sub[0] = 0xfb3; $sub[1]= 0xf80; return $sub;
 | |
| 		case 0xf79	: $sub[0] = 0xfb3; $sub[1]= 0xf71; $sub[2]= 0xf80; return $sub;
 | |
| 		case 0xf81  : $sub[0] = 0xf71; $sub[1]= 0xf80; return $sub;
 | |
| 	}
 | |
| 	return false;
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| public static function bubble_sort(&$arr, $start, $len) {
 | |
| 	if ($len<2) { return;}
 | |
| 	$k = $start+$len-2;
 | |
| 	while ($k >= $start) {
 | |
| 		for ($j=$start; $j<=$k; $j++) {
 | |
| 			if ($arr[$j]['indic_position'] > $arr[$j + 1]['indic_position']) {
 | |
| 				$t = $arr[$j];
 | |
| 				$arr[$j] = $arr[$j + 1];
 | |
| 				$arr[$j + 1] = $t;
 | |
| 			}
 | |
| 		}
 | |
| 		$k--;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| 
 | |
| }	// end Class
 | |
| 
 | |
| ?>
 |