RTLFixer.cs 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971
  1. using System;
  2. using System.Collections.Generic;
  3. namespace I2.Loc
  4. {
  5. public class RTLFixer
  6. {
  7. /// <summary>
  8. /// Fix the specified string.
  9. /// </summary>
  10. /// <param name='str'>
  11. /// String to be fixed.
  12. /// </param>
  13. public static string Fix(string str)
  14. {
  15. return Fix(str, false, true);
  16. }
  17. public static string Fix(string str, bool rtl)
  18. {
  19. if (rtl)
  20. {
  21. return Fix(str);
  22. }
  23. string[] words = str.Split(' ');
  24. string result = "";
  25. string arabicToIgnore = "";
  26. foreach (string word in words)
  27. {
  28. if (char.IsLower(word.ToLower()[word.Length / 2]))
  29. {
  30. result += Fix(arabicToIgnore) + word + " ";
  31. arabicToIgnore = "";
  32. }
  33. else
  34. {
  35. arabicToIgnore += word + " ";
  36. }
  37. }
  38. if (arabicToIgnore != "")
  39. result += Fix(arabicToIgnore);
  40. return result;
  41. }
  42. /// <summary>
  43. /// Fix the specified string with customization options.
  44. /// </summary>
  45. /// <param name='str'>
  46. /// String to be fixed.
  47. /// </param>
  48. /// <param name='showTashkeel'>
  49. /// Show tashkeel.
  50. /// </param>
  51. /// <param name='useHinduNumbers'>
  52. /// Use hindu numbers.
  53. /// </param>
  54. public static string Fix(string str, bool showTashkeel, bool useHinduNumbers)
  55. {
  56. var newStr = HindiFixer.Fix(str);
  57. if (newStr != str)
  58. return newStr;
  59. RTLFixerTool.showTashkeel = showTashkeel;
  60. RTLFixerTool.useHinduNumbers = useHinduNumbers;
  61. if (str.Contains("\n"))
  62. str = str.Replace("\n", Environment.NewLine);
  63. if (str.Contains(Environment.NewLine))
  64. {
  65. string[] stringSeparators = { Environment.NewLine };
  66. string[] strSplit = str.Split(stringSeparators, StringSplitOptions.None);
  67. if (strSplit.Length == 0)
  68. return RTLFixerTool.FixLine(str);
  69. if (strSplit.Length == 1)
  70. return RTLFixerTool.FixLine(str);
  71. string outputString = RTLFixerTool.FixLine(strSplit[0]);
  72. int iteration = 1;
  73. if (strSplit.Length > 1)
  74. {
  75. while (iteration < strSplit.Length)
  76. {
  77. outputString += Environment.NewLine + RTLFixerTool.FixLine(strSplit[iteration]);
  78. iteration++;
  79. }
  80. }
  81. return outputString;
  82. }
  83. return RTLFixerTool.FixLine(str);
  84. }
  85. }
  86. /// <summary>
  87. /// Arabic Contextual forms General - Unicode
  88. /// </summary>
  89. internal enum IsolatedArabicLetters
  90. {
  91. Hamza = 0xFE80,
  92. Alef = 0xFE8D,
  93. AlefHamza = 0xFE83,
  94. WawHamza = 0xFE85,
  95. AlefMaksoor = 0xFE87,
  96. AlefMaksora = 0xFBFC,
  97. HamzaNabera = 0xFE89,
  98. Ba = 0xFE8F,
  99. Ta = 0xFE95,
  100. Tha2 = 0xFE99,
  101. Jeem = 0xFE9D,
  102. H7aa = 0xFEA1,
  103. Khaa2 = 0xFEA5,
  104. Dal = 0xFEA9,
  105. Thal = 0xFEAB,
  106. Ra2 = 0xFEAD,
  107. Zeen = 0xFEAF,
  108. Seen = 0xFEB1,
  109. Sheen = 0xFEB5,
  110. S9a = 0xFEB9,
  111. Dha = 0xFEBD,
  112. T6a = 0xFEC1,
  113. T6ha = 0xFEC5,
  114. Ain = 0xFEC9,
  115. Gain = 0xFECD,
  116. Fa = 0xFED1,
  117. Gaf = 0xFED5,
  118. Kaf = 0xFED9,
  119. Lam = 0xFEDD,
  120. Meem = 0xFEE1,
  121. Noon = 0xFEE5,
  122. Ha = 0xFEE9,
  123. Waw = 0xFEED,
  124. Ya = 0xFEF1,
  125. AlefMad = 0xFE81,
  126. TaMarboota = 0xFE93,
  127. PersianPe = 0xFB56, // Persian Letters;
  128. PersianChe = 0xFB7A,
  129. PersianZe = 0xFB8A,
  130. PersianGaf = 0xFB92,
  131. PersianGaf2 = 0xFB8E
  132. }
  133. /// <summary>
  134. /// Arabic Contextual forms - Isolated
  135. /// </summary>
  136. internal enum GeneralArabicLetters
  137. {
  138. Hamza = 0x0621,
  139. Alef = 0x0627,
  140. AlefHamza = 0x0623,
  141. WawHamza = 0x0624,
  142. AlefMaksoor = 0x0625,
  143. AlefMagsora = 0x0649,
  144. HamzaNabera = 0x0626,
  145. Ba = 0x0628,
  146. Ta = 0x062A,
  147. Tha2 = 0x062B,
  148. Jeem = 0x062C,
  149. H7aa = 0x062D,
  150. Khaa2 = 0x062E,
  151. Dal = 0x062F,
  152. Thal = 0x0630,
  153. Ra2 = 0x0631,
  154. Zeen = 0x0632,
  155. Seen = 0x0633,
  156. Sheen = 0x0634,
  157. S9a = 0x0635,
  158. Dha = 0x0636,
  159. T6a = 0x0637,
  160. T6ha = 0x0638,
  161. Ain = 0x0639,
  162. Gain = 0x063A,
  163. Fa = 0x0641,
  164. Gaf = 0x0642,
  165. Kaf = 0x0643,
  166. Lam = 0x0644,
  167. Meem = 0x0645,
  168. Noon = 0x0646,
  169. Ha = 0x0647,
  170. Waw = 0x0648,
  171. Ya = 0x064A,
  172. AlefMad = 0x0622,
  173. TaMarboota = 0x0629,
  174. PersianPe = 0x067E, // Persian Letters;
  175. PersianChe = 0x0686,
  176. PersianZe = 0x0698,
  177. PersianGaf = 0x06AF,
  178. PersianGaf2 = 0x06A9
  179. }
  180. /// <summary>
  181. /// Data Structure for conversion
  182. /// </summary>
  183. internal class ArabicMapping
  184. {
  185. public int from;
  186. public int to;
  187. public ArabicMapping(int from, int to)
  188. {
  189. this.from = from;
  190. this.to = to;
  191. }
  192. }
  193. /// <summary>
  194. /// Sets up and creates the conversion table
  195. /// </summary>
  196. internal class ArabicTable
  197. {
  198. private static List<ArabicMapping> mapList;
  199. private static ArabicTable arabicMapper;
  200. /// <summary>
  201. /// Setting up the conversion table
  202. /// </summary>
  203. private ArabicTable()
  204. {
  205. mapList = new List<ArabicMapping>();
  206. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Hamza, (int)IsolatedArabicLetters.Hamza));
  207. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Alef, (int)IsolatedArabicLetters.Alef));
  208. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.AlefHamza, (int)IsolatedArabicLetters.AlefHamza));
  209. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.WawHamza, (int)IsolatedArabicLetters.WawHamza));
  210. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.AlefMaksoor, (int)IsolatedArabicLetters.AlefMaksoor));
  211. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.AlefMagsora, (int)IsolatedArabicLetters.AlefMaksora));
  212. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.HamzaNabera, (int)IsolatedArabicLetters.HamzaNabera));
  213. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Ba, (int)IsolatedArabicLetters.Ba));
  214. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Ta, (int)IsolatedArabicLetters.Ta));
  215. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Tha2, (int)IsolatedArabicLetters.Tha2));
  216. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Jeem, (int)IsolatedArabicLetters.Jeem));
  217. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.H7aa, (int)IsolatedArabicLetters.H7aa));
  218. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Khaa2, (int)IsolatedArabicLetters.Khaa2));
  219. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Dal, (int)IsolatedArabicLetters.Dal));
  220. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Thal, (int)IsolatedArabicLetters.Thal));
  221. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Ra2, (int)IsolatedArabicLetters.Ra2));
  222. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Zeen, (int)IsolatedArabicLetters.Zeen));
  223. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Seen, (int)IsolatedArabicLetters.Seen));
  224. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Sheen, (int)IsolatedArabicLetters.Sheen));
  225. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.S9a, (int)IsolatedArabicLetters.S9a));
  226. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Dha, (int)IsolatedArabicLetters.Dha));
  227. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.T6a, (int)IsolatedArabicLetters.T6a));
  228. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.T6ha, (int)IsolatedArabicLetters.T6ha));
  229. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Ain, (int)IsolatedArabicLetters.Ain));
  230. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Gain, (int)IsolatedArabicLetters.Gain));
  231. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Fa, (int)IsolatedArabicLetters.Fa));
  232. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Gaf, (int)IsolatedArabicLetters.Gaf));
  233. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Kaf, (int)IsolatedArabicLetters.Kaf));
  234. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Lam, (int)IsolatedArabicLetters.Lam));
  235. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Meem, (int)IsolatedArabicLetters.Meem));
  236. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Noon, (int)IsolatedArabicLetters.Noon));
  237. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Ha, (int)IsolatedArabicLetters.Ha));
  238. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Waw, (int)IsolatedArabicLetters.Waw));
  239. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.Ya, (int)IsolatedArabicLetters.Ya));
  240. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.AlefMad, (int)IsolatedArabicLetters.AlefMad));
  241. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.TaMarboota, (int)IsolatedArabicLetters.TaMarboota));
  242. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.PersianPe, (int)IsolatedArabicLetters.PersianPe)); // Persian Letters;
  243. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.PersianChe, (int)IsolatedArabicLetters.PersianChe));
  244. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.PersianZe, (int)IsolatedArabicLetters.PersianZe));
  245. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.PersianGaf, (int)IsolatedArabicLetters.PersianGaf));
  246. mapList.Add(new ArabicMapping((int)GeneralArabicLetters.PersianGaf2, (int)IsolatedArabicLetters.PersianGaf2));
  247. //for (int i = 0; i < generalArabic.Length; i++)
  248. // mapList.Add(new ArabicMapping((int)generalArabic.GetValue(i), (int)isolatedArabic.GetValue(i))); // I
  249. }
  250. /// <summary>
  251. /// Singleton design pattern, Get the mapper. If it was not created before, create it.
  252. /// </summary>
  253. internal static ArabicTable ArabicMapper
  254. {
  255. get
  256. {
  257. if (arabicMapper == null)
  258. arabicMapper = new ArabicTable();
  259. return arabicMapper;
  260. }
  261. }
  262. internal int Convert(int toBeConverted)
  263. {
  264. foreach (ArabicMapping arabicMap in mapList)
  265. if (arabicMap.from == toBeConverted)
  266. {
  267. return arabicMap.to;
  268. }
  269. return toBeConverted;
  270. }
  271. }
  272. internal class TashkeelLocation
  273. {
  274. public char tashkeel;
  275. public int position;
  276. public TashkeelLocation(char tashkeel, int position)
  277. {
  278. this.tashkeel = tashkeel;
  279. this.position = position;
  280. }
  281. }
  282. internal class RTLFixerTool
  283. {
  284. internal static bool showTashkeel = true;
  285. internal static bool useHinduNumbers;
  286. internal static string RemoveTashkeel(string str, out List<TashkeelLocation> tashkeelLocation)
  287. {
  288. tashkeelLocation = new List<TashkeelLocation>();
  289. char[] letters = str.ToCharArray();
  290. int index = 0;
  291. for (int i = 0; i < letters.Length; i++)
  292. {
  293. if (letters[i] == (char)0x064B)
  294. { // Tanween Fatha
  295. tashkeelLocation.Add(new TashkeelLocation((char)0x064B, i));
  296. index++;
  297. }
  298. else if (letters[i] == (char)0x064C)
  299. { // DAMMATAN
  300. tashkeelLocation.Add(new TashkeelLocation((char)0x064C, i));
  301. index++;
  302. }
  303. else if (letters[i] == (char)0x064D)
  304. { // KASRATAN
  305. tashkeelLocation.Add(new TashkeelLocation((char)0x064D, i));
  306. index++;
  307. }
  308. else if (letters[i] == (char)0x064E)
  309. { // FATHA
  310. if (index > 0)
  311. {
  312. if (tashkeelLocation[index - 1].tashkeel == (char)0x0651) // SHADDA
  313. {
  314. tashkeelLocation[index - 1].tashkeel = (char)0xFC60; // Shadda With Fatha
  315. continue;
  316. }
  317. }
  318. tashkeelLocation.Add(new TashkeelLocation((char)0x064E, i));
  319. index++;
  320. }
  321. else if (letters[i] == (char)0x064F)
  322. { // DAMMA
  323. if (index > 0)
  324. {
  325. if (tashkeelLocation[index - 1].tashkeel == (char)0x0651)
  326. { // SHADDA
  327. tashkeelLocation[index - 1].tashkeel = (char)0xFC61; // Shadda With DAMMA
  328. continue;
  329. }
  330. }
  331. tashkeelLocation.Add(new TashkeelLocation((char)0x064F, i));
  332. index++;
  333. }
  334. else if (letters[i] == (char)0x0650)
  335. { // KASRA
  336. if (index > 0)
  337. {
  338. if (tashkeelLocation[index - 1].tashkeel == (char)0x0651)
  339. { // SHADDA
  340. tashkeelLocation[index - 1].tashkeel = (char)0xFC62; // Shadda With KASRA
  341. continue;
  342. }
  343. }
  344. tashkeelLocation.Add(new TashkeelLocation((char)0x0650, i));
  345. index++;
  346. }
  347. else if (letters[i] == (char)0x0651)
  348. { // SHADDA
  349. if (index > 0)
  350. {
  351. if (tashkeelLocation[index - 1].tashkeel == (char)0x064E) // FATHA
  352. {
  353. tashkeelLocation[index - 1].tashkeel = (char)0xFC60; // Shadda With Fatha
  354. continue;
  355. }
  356. if (tashkeelLocation[index - 1].tashkeel == (char)0x064F) // DAMMA
  357. {
  358. tashkeelLocation[index - 1].tashkeel = (char)0xFC61; // Shadda With DAMMA
  359. continue;
  360. }
  361. if (tashkeelLocation[index - 1].tashkeel == (char)0x0650) // KASRA
  362. {
  363. tashkeelLocation[index - 1].tashkeel = (char)0xFC62; // Shadda With KASRA
  364. continue;
  365. }
  366. }
  367. tashkeelLocation.Add(new TashkeelLocation((char)0x0651, i));
  368. index++;
  369. }
  370. else if (letters[i] == (char)0x0652)
  371. { // SUKUN
  372. tashkeelLocation.Add(new TashkeelLocation((char)0x0652, i));
  373. index++;
  374. }
  375. else if (letters[i] == (char)0x0653)
  376. { // MADDAH ABOVE
  377. tashkeelLocation.Add(new TashkeelLocation((char)0x0653, i));
  378. index++;
  379. }
  380. }
  381. string[] split = str.Split((char)0x064B, (char)0x064C, (char)0x064D, (char)0x064E, (char)0x064F, (char)0x0650, (char)0x0651, (char)0x0652, (char)0x0653, (char)0xFC60, (char)0xFC61, (char)0xFC62);
  382. str = "";
  383. foreach (string s in split)
  384. {
  385. str += s;
  386. }
  387. return str;
  388. }
  389. internal static char[] ReturnTashkeel(char[] letters, List<TashkeelLocation> tashkeelLocation)
  390. {
  391. char[] lettersWithTashkeel = new char[letters.Length + tashkeelLocation.Count];
  392. int letterWithTashkeelTracker = 0;
  393. for (int i = 0; i < letters.Length; i++)
  394. {
  395. lettersWithTashkeel[letterWithTashkeelTracker] = letters[i];
  396. letterWithTashkeelTracker++;
  397. foreach (TashkeelLocation hLocation in tashkeelLocation)
  398. {
  399. if (hLocation.position == letterWithTashkeelTracker)
  400. {
  401. lettersWithTashkeel[letterWithTashkeelTracker] = hLocation.tashkeel;
  402. letterWithTashkeelTracker++;
  403. }
  404. }
  405. }
  406. return lettersWithTashkeel;
  407. }
  408. /// <summary>
  409. /// Converts a string to a form in which the sting will be displayed correctly for arabic text.
  410. /// </summary>
  411. /// <param name="str">String to be converted. Example: "Aaa"</param>
  412. /// <returns>Converted string. Example: "aa aaa A" without the spaces.</returns>
  413. internal static string FixLine(string str)
  414. {
  415. string test = "";
  416. List<TashkeelLocation> tashkeelLocation;
  417. string originString = RemoveTashkeel(str, out tashkeelLocation);
  418. char[] lettersOrigin = originString.ToCharArray();
  419. char[] lettersFinal = originString.ToCharArray();
  420. for (int i = 0; i < lettersOrigin.Length; i++)
  421. {
  422. lettersOrigin[i] = (char)ArabicTable.ArabicMapper.Convert(lettersOrigin[i]);
  423. }
  424. for (int i = 0; i < lettersOrigin.Length; i++)
  425. {
  426. bool skip = false;
  427. //lettersOrigin[i] = (char)ArabicTable.ArabicMapper.Convert(lettersOrigin[i]);
  428. // For special Lam Letter connections.
  429. if (lettersOrigin[i] == (char)IsolatedArabicLetters.Lam)
  430. {
  431. if (i < lettersOrigin.Length - 1)
  432. {
  433. //lettersOrigin[i + 1] = (char)ArabicTable.ArabicMapper.Convert(lettersOrigin[i + 1]);
  434. if (lettersOrigin[i + 1] == (char)IsolatedArabicLetters.AlefMaksoor)
  435. {
  436. lettersOrigin[i] = (char)0xFEF7;
  437. lettersFinal[i + 1] = (char)0xFFFF;
  438. skip = true;
  439. }
  440. else if (lettersOrigin[i + 1] == (char)IsolatedArabicLetters.Alef)
  441. {
  442. lettersOrigin[i] = (char)0xFEF9;
  443. lettersFinal[i + 1] = (char)0xFFFF;
  444. skip = true;
  445. }
  446. else if (lettersOrigin[i + 1] == (char)IsolatedArabicLetters.AlefHamza)
  447. {
  448. lettersOrigin[i] = (char)0xFEF5;
  449. lettersFinal[i + 1] = (char)0xFFFF;
  450. skip = true;
  451. }
  452. else if (lettersOrigin[i + 1] == (char)IsolatedArabicLetters.AlefMad)
  453. {
  454. lettersOrigin[i] = (char)0xFEF3;
  455. lettersFinal[i + 1] = (char)0xFFFF;
  456. skip = true;
  457. }
  458. }
  459. }
  460. if (!IsIgnoredCharacter(lettersOrigin[i]))
  461. {
  462. if (IsMiddleLetter(lettersOrigin, i))
  463. lettersFinal[i] = (char)(lettersOrigin[i] + 3);
  464. else if (IsFinishingLetter(lettersOrigin, i))
  465. lettersFinal[i] = (char)(lettersOrigin[i] + 1);
  466. else if (IsLeadingLetter(lettersOrigin, i))
  467. lettersFinal[i] = (char)(lettersOrigin[i] + 2);
  468. }
  469. //string strOut = String.Format(@"\x{0:x4}", (ushort)lettersOrigin[i]);
  470. //UnityEngine.Debug.Log(strOut);
  471. //strOut = String.Format(@"\x{0:x4}", (ushort)lettersFinal[i]);
  472. //UnityEngine.Debug.Log(strOut);
  473. test += Convert.ToString(lettersOrigin[i], 16) + " ";
  474. if (skip)
  475. i++;
  476. //chaning numbers to hindu
  477. if (useHinduNumbers)
  478. {
  479. if (lettersOrigin[i] == (char)0x0030)
  480. lettersFinal[i] = (char)0x0660;
  481. else if (lettersOrigin[i] == (char)0x0031)
  482. lettersFinal[i] = (char)0x0661;
  483. else if (lettersOrigin[i] == (char)0x0032)
  484. lettersFinal[i] = (char)0x0662;
  485. else if (lettersOrigin[i] == (char)0x0033)
  486. lettersFinal[i] = (char)0x0663;
  487. else if (lettersOrigin[i] == (char)0x0034)
  488. lettersFinal[i] = (char)0x0664;
  489. else if (lettersOrigin[i] == (char)0x0035)
  490. lettersFinal[i] = (char)0x0665;
  491. else if (lettersOrigin[i] == (char)0x0036)
  492. lettersFinal[i] = (char)0x0666;
  493. else if (lettersOrigin[i] == (char)0x0037)
  494. lettersFinal[i] = (char)0x0667;
  495. else if (lettersOrigin[i] == (char)0x0038)
  496. lettersFinal[i] = (char)0x0668;
  497. else if (lettersOrigin[i] == (char)0x0039)
  498. lettersFinal[i] = (char)0x0669;
  499. }
  500. }
  501. //Return the Tashkeel to their places.
  502. if (showTashkeel)
  503. lettersFinal = ReturnTashkeel(lettersFinal, tashkeelLocation);
  504. List<char> list = new List<char>();
  505. List<char> numberList = new List<char>();
  506. for (int i = lettersFinal.Length - 1; i >= 0; i--)
  507. {
  508. // if (lettersFinal[i] == '(')
  509. // numberList.Add(')');
  510. // else if (lettersFinal[i] == ')')
  511. // numberList.Add('(');
  512. // else if (lettersFinal[i] == '<')
  513. // numberList.Add('>');
  514. // else if (lettersFinal[i] == '>')
  515. // numberList.Add('<');
  516. // else
  517. if (char.IsPunctuation(lettersFinal[i]) && i > 0 && i < lettersFinal.Length - 1 &&
  518. (char.IsPunctuation(lettersFinal[i - 1]) || char.IsPunctuation(lettersFinal[i + 1])))
  519. {
  520. if (lettersFinal[i] == '(')
  521. list.Add(')');
  522. else if (lettersFinal[i] == ')')
  523. list.Add('(');
  524. else if (lettersFinal[i] == '<')
  525. list.Add('>');
  526. else if (lettersFinal[i] == '>')
  527. list.Add('<');
  528. else if (lettersFinal[i] == '[')
  529. list.Add(']');
  530. else if (lettersFinal[i] == ']')
  531. list.Add('[');
  532. else if (lettersFinal[i] != 0xFFFF)
  533. list.Add(lettersFinal[i]);
  534. }
  535. // For cases where english words and arabic are mixed. This allows for using arabic, english and numbers in one sentence.
  536. else if (lettersFinal[i] == ' ' && i > 0 && i < lettersFinal.Length - 1 &&
  537. (char.IsLower(lettersFinal[i - 1]) || char.IsUpper(lettersFinal[i - 1]) || char.IsNumber(lettersFinal[i - 1])) &&
  538. (char.IsLower(lettersFinal[i + 1]) || char.IsUpper(lettersFinal[i + 1]) || char.IsNumber(lettersFinal[i + 1])))
  539. {
  540. numberList.Add(lettersFinal[i]);
  541. }
  542. else if (char.IsNumber(lettersFinal[i]) || char.IsLower(lettersFinal[i]) ||
  543. char.IsUpper(lettersFinal[i]) || char.IsSymbol(lettersFinal[i]) ||
  544. char.IsPunctuation(lettersFinal[i]))// || lettersFinal[i] == '^') //)
  545. {
  546. if (lettersFinal[i] == '(')
  547. numberList.Add(')');
  548. else if (lettersFinal[i] == ')')
  549. numberList.Add('(');
  550. else if (lettersFinal[i] == '<')
  551. numberList.Add('>');
  552. else if (lettersFinal[i] == '>')
  553. numberList.Add('<');
  554. else if (lettersFinal[i] == '[')
  555. list.Add(']');
  556. else if (lettersFinal[i] == ']')
  557. list.Add('[');
  558. else
  559. numberList.Add(lettersFinal[i]);
  560. }
  561. else if (lettersFinal[i] >= (char)0xD800 && lettersFinal[i] <= (char)0xDBFF ||
  562. lettersFinal[i] >= (char)0xDC00 && lettersFinal[i] <= (char)0xDFFF)
  563. {
  564. numberList.Add(lettersFinal[i]);
  565. }
  566. else
  567. {
  568. if (numberList.Count > 0)
  569. {
  570. for (int j = 0; j < numberList.Count; j++)
  571. list.Add(numberList[numberList.Count - 1 - j]);
  572. numberList.Clear();
  573. }
  574. if (lettersFinal[i] != 0xFFFF)
  575. list.Add(lettersFinal[i]);
  576. }
  577. }
  578. if (numberList.Count > 0)
  579. {
  580. for (int j = 0; j < numberList.Count; j++)
  581. list.Add(numberList[numberList.Count - 1 - j]);
  582. numberList.Clear();
  583. }
  584. // Moving letters from a list to an array.
  585. lettersFinal = new char[list.Count];
  586. for (int i = 0; i < lettersFinal.Length; i++)
  587. lettersFinal[i] = list[i];
  588. str = new string(lettersFinal);
  589. return str;
  590. }
  591. /// <summary>
  592. /// English letters, numbers and punctuation characters are ignored. This checks if the ch is an ignored character.
  593. /// </summary>
  594. /// <param name="ch">The character to be checked for skipping</param>
  595. /// <returns>True if the character should be ignored, false if it should not be ignored.</returns>
  596. internal static bool IsIgnoredCharacter(char ch)
  597. {
  598. bool isPunctuation = char.IsPunctuation(ch);
  599. bool isNumber = char.IsNumber(ch);
  600. bool isLower = char.IsLower(ch);
  601. bool isUpper = char.IsUpper(ch);
  602. bool isSymbol = char.IsSymbol(ch);
  603. bool isPersianCharacter = ch == (char)0xFB56 || ch == (char)0xFB7A || ch == (char)0xFB8A || ch == (char)0xFB92 || ch == (char)0xFB8E;
  604. bool isPresentationFormB = ch <= (char)0xFEFF && ch >= (char)0xFE70;
  605. bool isAcceptableCharacter = isPresentationFormB || isPersianCharacter || ch == (char)0xFBFC;
  606. return isPunctuation ||
  607. isNumber ||
  608. isLower ||
  609. isUpper ||
  610. isSymbol ||
  611. !isAcceptableCharacter ||
  612. ch == 'a' || ch == '>' || ch == '<' || ch == (char)0x061B;
  613. // return char.IsPunctuation(ch) || char.IsNumber(ch) || ch == 'a' || ch == '>' || ch == '<' ||
  614. // char.IsLower(ch) || char.IsUpper(ch) || ch == (char)0x061B || char.IsSymbol(ch)
  615. // || !(ch <= (char)0xFEFF && ch >= (char)0xFE70) // Presentation Form B
  616. // || ch == (char)0xFB56 || ch == (char)0xFB7A || ch == (char)0xFB8A || ch == (char)0xFB92; // Persian Characters
  617. // PersianPe = 0xFB56,
  618. // PersianChe = 0xFB7A,
  619. // PersianZe = 0xFB8A,
  620. // PersianGaf = 0xFB92
  621. //lettersOrigin[i] <= (char)0xFEFF && lettersOrigin[i] >= (char)0xFE70
  622. }
  623. /// <summary>
  624. /// Checks if the letter at index value is a leading character in Arabic or not.
  625. /// </summary>
  626. /// <param name="letters">The whole word that contains the character to be checked</param>
  627. /// <param name="index">The index of the character to be checked</param>
  628. /// <returns>True if the character at index is a leading character, else, returns false</returns>
  629. internal static bool IsLeadingLetter(char[] letters, int index)
  630. {
  631. bool lettersThatCannotBeBeforeALeadingLetter = index == 0
  632. || letters[index - 1] == ' '
  633. || letters[index - 1] == '*' // ??? Remove?
  634. || letters[index - 1] == 'A' // ??? Remove?
  635. || char.IsPunctuation(letters[index - 1])
  636. || letters[index - 1] == '>'
  637. || letters[index - 1] == '<'
  638. || letters[index - 1] == (int)IsolatedArabicLetters.Alef
  639. || letters[index - 1] == (int)IsolatedArabicLetters.Dal
  640. || letters[index - 1] == (int)IsolatedArabicLetters.Thal
  641. || letters[index - 1] == (int)IsolatedArabicLetters.Ra2
  642. || letters[index - 1] == (int)IsolatedArabicLetters.Zeen
  643. || letters[index - 1] == (int)IsolatedArabicLetters.PersianZe
  644. //|| letters[index - 1] == (int)IsolatedArabicLetters.AlefMaksora
  645. || letters[index - 1] == (int)IsolatedArabicLetters.Waw
  646. || letters[index - 1] == (int)IsolatedArabicLetters.AlefMad
  647. || letters[index - 1] == (int)IsolatedArabicLetters.AlefHamza
  648. || letters[index - 1] == (int)IsolatedArabicLetters.AlefMaksoor
  649. || letters[index - 1] == (int)IsolatedArabicLetters.WawHamza;
  650. bool lettersThatCannotBeALeadingLetter = letters[index] != ' '
  651. && letters[index] != (int)IsolatedArabicLetters.Dal
  652. && letters[index] != (int)IsolatedArabicLetters.Thal
  653. && letters[index] != (int)IsolatedArabicLetters.Ra2
  654. && letters[index] != (int)IsolatedArabicLetters.Zeen
  655. && letters[index] != (int)IsolatedArabicLetters.PersianZe
  656. && letters[index] != (int)IsolatedArabicLetters.Alef
  657. && letters[index] != (int)IsolatedArabicLetters.AlefHamza
  658. && letters[index] != (int)IsolatedArabicLetters.AlefMaksoor
  659. && letters[index] != (int)IsolatedArabicLetters.AlefMad
  660. && letters[index] != (int)IsolatedArabicLetters.WawHamza
  661. && letters[index] != (int)IsolatedArabicLetters.Waw
  662. && letters[index] != (int)IsolatedArabicLetters.Hamza;
  663. bool lettersThatCannotBeAfterLeadingLetter = index < letters.Length - 1
  664. && letters[index + 1] != ' '
  665. && !char.IsPunctuation(letters[index + 1])
  666. && !char.IsNumber(letters[index + 1])
  667. && !char.IsSymbol(letters[index + 1])
  668. && !char.IsLower(letters[index + 1])
  669. && !char.IsUpper(letters[index + 1])
  670. && letters[index + 1] != (int)IsolatedArabicLetters.Hamza;
  671. if (lettersThatCannotBeBeforeALeadingLetter && lettersThatCannotBeALeadingLetter && lettersThatCannotBeAfterLeadingLetter)
  672. // if ((index == 0 || letters[index - 1] == ' ' || letters[index - 1] == '*' || letters[index - 1] == 'A' || char.IsPunctuation(letters[index - 1])
  673. // || letters[index - 1] == '>' || letters[index - 1] == '<'
  674. // || letters[index - 1] == (int)IsolatedArabicLetters.Alef
  675. // || letters[index - 1] == (int)IsolatedArabicLetters.Dal || letters[index - 1] == (int)IsolatedArabicLetters.Thal
  676. // || letters[index - 1] == (int)IsolatedArabicLetters.Ra2
  677. // || letters[index - 1] == (int)IsolatedArabicLetters.Zeen || letters[index - 1] == (int)IsolatedArabicLetters.PersianZe
  678. // || letters[index - 1] == (int)IsolatedArabicLetters.AlefMaksora || letters[index - 1] == (int)IsolatedArabicLetters.Waw
  679. // || letters[index - 1] == (int)IsolatedArabicLetters.AlefMad || letters[index - 1] == (int)IsolatedArabicLetters.AlefHamza
  680. // || letters[index - 1] == (int)IsolatedArabicLetters.AlefMaksoor || letters[index - 1] == (int)IsolatedArabicLetters.WawHamza)
  681. // && letters[index] != ' ' && letters[index] != (int)IsolatedArabicLetters.Dal
  682. // && letters[index] != (int)IsolatedArabicLetters.Thal
  683. // && letters[index] != (int)IsolatedArabicLetters.Ra2
  684. // && letters[index] != (int)IsolatedArabicLetters.Zeen && letters[index] != (int)IsolatedArabicLetters.PersianZe
  685. // && letters[index] != (int)IsolatedArabicLetters.Alef && letters[index] != (int)IsolatedArabicLetters.AlefHamza
  686. // && letters[index] != (int)IsolatedArabicLetters.AlefMaksoor
  687. // && letters[index] != (int)IsolatedArabicLetters.AlefMad
  688. // && letters[index] != (int)IsolatedArabicLetters.WawHamza
  689. // && letters[index] != (int)IsolatedArabicLetters.Waw
  690. // && letters[index] != (int)IsolatedArabicLetters.Hamza
  691. // && index < letters.Length - 1 && letters[index + 1] != ' ' && !char.IsPunctuation(letters[index + 1] ) && !char.IsNumber(letters[index + 1])
  692. // && letters[index + 1] != (int)IsolatedArabicLetters.Hamza )
  693. {
  694. return true;
  695. }
  696. return false;
  697. }
  698. /// <summary>
  699. /// Checks if the letter at index value is a finishing character in Arabic or not.
  700. /// </summary>
  701. /// <param name="letters">The whole word that contains the character to be checked</param>
  702. /// <param name="index">The index of the character to be checked</param>
  703. /// <returns>True if the character at index is a finishing character, else, returns false</returns>
  704. internal static bool IsFinishingLetter(char[] letters, int index)
  705. {
  706. //bool indexZero = index != 0;
  707. bool lettersThatCannotBeBeforeAFinishingLetter = index == 0 ? false :
  708. letters[index - 1] != ' '
  709. // && char.IsDigit(letters[index-1])
  710. // && char.IsLower(letters[index-1])
  711. // && char.IsUpper(letters[index-1])
  712. // && char.IsNumber(letters[index-1])
  713. // && char.IsWhiteSpace(letters[index-1])
  714. // && char.IsPunctuation(letters[index-1])
  715. // && char.IsSymbol(letters[index-1])
  716. && letters[index - 1] != (int)IsolatedArabicLetters.Dal
  717. && letters[index - 1] != (int)IsolatedArabicLetters.Thal
  718. && letters[index - 1] != (int)IsolatedArabicLetters.Ra2
  719. && letters[index - 1] != (int)IsolatedArabicLetters.Zeen
  720. && letters[index - 1] != (int)IsolatedArabicLetters.PersianZe
  721. //&& letters[index - 1] != (int)IsolatedArabicLetters.AlefMaksora
  722. && letters[index - 1] != (int)IsolatedArabicLetters.Waw
  723. && letters[index - 1] != (int)IsolatedArabicLetters.Alef
  724. && letters[index - 1] != (int)IsolatedArabicLetters.AlefMad
  725. && letters[index - 1] != (int)IsolatedArabicLetters.AlefHamza
  726. && letters[index - 1] != (int)IsolatedArabicLetters.AlefMaksoor
  727. && letters[index - 1] != (int)IsolatedArabicLetters.WawHamza
  728. && letters[index - 1] != (int)IsolatedArabicLetters.Hamza
  729. && !char.IsPunctuation(letters[index - 1])
  730. && letters[index - 1] != '>'
  731. && letters[index - 1] != '<';
  732. bool lettersThatCannotBeFinishingLetters = letters[index] != ' ' && letters[index] != (int)IsolatedArabicLetters.Hamza;
  733. if (lettersThatCannotBeBeforeAFinishingLetter && lettersThatCannotBeFinishingLetters)
  734. // if (index != 0 && letters[index - 1] != ' ' && letters[index - 1] != '*' && letters[index - 1] != 'A'
  735. // && letters[index - 1] != (int)IsolatedArabicLetters.Dal && letters[index - 1] != (int)IsolatedArabicLetters.Thal
  736. // && letters[index - 1] != (int)IsolatedArabicLetters.Ra2
  737. // && letters[index - 1] != (int)IsolatedArabicLetters.Zeen && letters[index - 1] != (int)IsolatedArabicLetters.PersianZe
  738. // && letters[index - 1] != (int)IsolatedArabicLetters.AlefMaksora && letters[index - 1] != (int)IsolatedArabicLetters.Waw
  739. // && letters[index - 1] != (int)IsolatedArabicLetters.Alef && letters[index - 1] != (int)IsolatedArabicLetters.AlefMad
  740. // && letters[index - 1] != (int)IsolatedArabicLetters.AlefHamza && letters[index - 1] != (int)IsolatedArabicLetters.AlefMaksoor
  741. // && letters[index - 1] != (int)IsolatedArabicLetters.WawHamza && letters[index - 1] != (int)IsolatedArabicLetters.Hamza
  742. // && !char.IsPunctuation(letters[index - 1]) && letters[index - 1] != '>' && letters[index - 1] != '<'
  743. // && letters[index] != ' ' && index < letters.Length
  744. // && letters[index] != (int)IsolatedArabicLetters.Hamza)
  745. {
  746. //try
  747. //{
  748. // if (char.IsPunctuation(letters[index + 1]))
  749. // return true;
  750. // else
  751. // return false;
  752. //}
  753. //catch (Exception e)
  754. //{
  755. // return false;
  756. //}
  757. return true;
  758. }
  759. //return true;
  760. return false;
  761. }
  762. /// <summary>
  763. /// Checks if the letter at index value is a middle character in Arabic or not.
  764. /// </summary>
  765. /// <param name="letters">The whole word that contains the character to be checked</param>
  766. /// <param name="index">The index of the character to be checked</param>
  767. /// <returns>True if the character at index is a middle character, else, returns false</returns>
  768. internal static bool IsMiddleLetter(char[] letters, int index)
  769. {
  770. bool lettersThatCannotBeMiddleLetters = index == 0 ? false :
  771. letters[index] != (int)IsolatedArabicLetters.Alef
  772. && letters[index] != (int)IsolatedArabicLetters.Dal
  773. && letters[index] != (int)IsolatedArabicLetters.Thal
  774. && letters[index] != (int)IsolatedArabicLetters.Ra2
  775. && letters[index] != (int)IsolatedArabicLetters.Zeen
  776. && letters[index] != (int)IsolatedArabicLetters.PersianZe
  777. //&& letters[index] != (int)IsolatedArabicLetters.AlefMaksora
  778. && letters[index] != (int)IsolatedArabicLetters.Waw
  779. && letters[index] != (int)IsolatedArabicLetters.AlefMad
  780. && letters[index] != (int)IsolatedArabicLetters.AlefHamza
  781. && letters[index] != (int)IsolatedArabicLetters.AlefMaksoor
  782. && letters[index] != (int)IsolatedArabicLetters.WawHamza
  783. && letters[index] != (int)IsolatedArabicLetters.Hamza;
  784. bool lettersThatCannotBeBeforeMiddleCharacters = index == 0 ? false :
  785. letters[index - 1] != (int)IsolatedArabicLetters.Alef
  786. && letters[index - 1] != (int)IsolatedArabicLetters.Dal
  787. && letters[index - 1] != (int)IsolatedArabicLetters.Thal
  788. && letters[index - 1] != (int)IsolatedArabicLetters.Ra2
  789. && letters[index - 1] != (int)IsolatedArabicLetters.Zeen
  790. && letters[index - 1] != (int)IsolatedArabicLetters.PersianZe
  791. //&& letters[index - 1] != (int)IsolatedArabicLetters.AlefMaksora
  792. && letters[index - 1] != (int)IsolatedArabicLetters.Waw
  793. && letters[index - 1] != (int)IsolatedArabicLetters.AlefMad
  794. && letters[index - 1] != (int)IsolatedArabicLetters.AlefHamza
  795. && letters[index - 1] != (int)IsolatedArabicLetters.AlefMaksoor
  796. && letters[index - 1] != (int)IsolatedArabicLetters.WawHamza
  797. && letters[index - 1] != (int)IsolatedArabicLetters.Hamza
  798. && !char.IsPunctuation(letters[index - 1])
  799. && letters[index - 1] != '>'
  800. && letters[index - 1] != '<'
  801. && letters[index - 1] != ' '
  802. && letters[index - 1] != '*';
  803. bool lettersThatCannotBeAfterMiddleCharacters = index >= letters.Length - 1 ? false :
  804. letters[index + 1] != ' '
  805. && letters[index + 1] != '\r'
  806. && letters[index + 1] != (int)IsolatedArabicLetters.Hamza
  807. && !char.IsNumber(letters[index + 1])
  808. && !char.IsSymbol(letters[index + 1])
  809. && !char.IsPunctuation(letters[index + 1]);
  810. if (lettersThatCannotBeAfterMiddleCharacters && lettersThatCannotBeBeforeMiddleCharacters && lettersThatCannotBeMiddleLetters)
  811. // if (index != 0 && letters[index] != ' '
  812. // && letters[index] != (int)IsolatedArabicLetters.Alef && letters[index] != (int)IsolatedArabicLetters.Dal
  813. // && letters[index] != (int)IsolatedArabicLetters.Thal && letters[index] != (int)IsolatedArabicLetters.Ra2
  814. // && letters[index] != (int)IsolatedArabicLetters.Zeen && letters[index] != (int)IsolatedArabicLetters.PersianZe
  815. // && letters[index] != (int)IsolatedArabicLetters.AlefMaksora
  816. // && letters[index] != (int)IsolatedArabicLetters.Waw && letters[index] != (int)IsolatedArabicLetters.AlefMad
  817. // && letters[index] != (int)IsolatedArabicLetters.AlefHamza && letters[index] != (int)IsolatedArabicLetters.AlefMaksoor
  818. // && letters[index] != (int)IsolatedArabicLetters.WawHamza && letters[index] != (int)IsolatedArabicLetters.Hamza
  819. // && letters[index - 1] != (int)IsolatedArabicLetters.Alef && letters[index - 1] != (int)IsolatedArabicLetters.Dal
  820. // && letters[index - 1] != (int)IsolatedArabicLetters.Thal && letters[index - 1] != (int)IsolatedArabicLetters.Ra2
  821. // && letters[index - 1] != (int)IsolatedArabicLetters.Zeen && letters[index - 1] != (int)IsolatedArabicLetters.PersianZe
  822. // && letters[index - 1] != (int)IsolatedArabicLetters.AlefMaksora
  823. // && letters[index - 1] != (int)IsolatedArabicLetters.Waw && letters[index - 1] != (int)IsolatedArabicLetters.AlefMad
  824. // && letters[index - 1] != (int)IsolatedArabicLetters.AlefHamza && letters[index - 1] != (int)IsolatedArabicLetters.AlefMaksoor
  825. // && letters[index - 1] != (int)IsolatedArabicLetters.WawHamza && letters[index - 1] != (int)IsolatedArabicLetters.Hamza
  826. // && letters[index - 1] != '>' && letters[index - 1] != '<'
  827. // && letters[index - 1] != ' ' && letters[index - 1] != '*' && !char.IsPunctuation(letters[index - 1])
  828. // && index < letters.Length - 1 && letters[index + 1] != ' ' && letters[index + 1] != '\r' && letters[index + 1] != 'A'
  829. // && letters[index + 1] != '>' && letters[index + 1] != '>' && letters[index + 1] != (int)IsolatedArabicLetters.Hamza
  830. // )
  831. {
  832. try
  833. {
  834. if (char.IsPunctuation(letters[index + 1]))
  835. return false;
  836. return true;
  837. }
  838. catch
  839. {
  840. return false;
  841. }
  842. //return true;
  843. }
  844. return false;
  845. }
  846. }
  847. }