HindiFixer.cs 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157
  1. using System.Linq;
  2. namespace I2.Loc
  3. {
  4. public class HindiFixer
  5. {
  6. // Needs to also implement: Hindi: https://www.microsoft.com/typography/OpenTypeDev/devanagari/intro.htm
  7. //https://social.msdn.microsoft.com/Forums/windows/en-US/9883ff08-bd72-499b-9543-ed424167281d/converting-hindi-text-to-english-text?forum=winforms
  8. internal static string Fix(string text)
  9. {
  10. while (true)
  11. {
  12. char[] arr = text.ToCharArray();
  13. bool changed = false;
  14. for (int i = 0; i < arr.Length; ++i)
  15. {
  16. // interchange the order of "i" vowel
  17. if (arr[i] == 2367 && !char.IsWhiteSpace(arr[i - 1]) && arr[i - 1]!=0)
  18. {
  19. arr[i] = arr[i - 1];
  20. arr[i - 1] = (char)2367;
  21. changed = true;
  22. }
  23. if (i == arr.Length - 1)
  24. continue;
  25. // letter "I" + Nukta forms letter vocalic "L"
  26. if (arr[i] == 2311)
  27. {
  28. if (arr[i + 1] == 2364)
  29. {
  30. arr[i] = (char)2316;
  31. arr[i + 1] = (char)0;
  32. changed = true;
  33. }
  34. }
  35. // vowel sign vocalic "R" + sign Nukta forms vowel sign vocalic "Rr"
  36. if (arr[i] == 2371)
  37. {
  38. if (arr[i + 1] == 2364)
  39. {
  40. arr[i] = (char)2372;
  41. arr[i + 1] = (char)0;
  42. changed = true;
  43. }
  44. }
  45. // Candrabindu + sign Nukta forms Om
  46. if (arr[i] == 2305)
  47. {
  48. if (arr[i + 1] == 2364)
  49. {
  50. arr[i] = (char)2384;
  51. arr[i + 1] = (char)0;
  52. changed = true;
  53. }
  54. }
  55. // letter vocalic "R" + sign Nukta forms letter vocalic "Rr"
  56. if (arr[i] == 2315)
  57. {
  58. if (arr[i + 1] == 2364)
  59. {
  60. arr[i] = (char)2400;
  61. arr[i + 1] = (char)0;
  62. changed = true;
  63. }
  64. }
  65. // letter "Ii" + sign Nukta forms letter vocalic "LI"
  66. if (arr[i] == 2312)
  67. {
  68. if (arr[i + 1] == 2364)
  69. {
  70. arr[i] = (char)2401;
  71. arr[i + 1] = (char)0;
  72. changed = true;
  73. }
  74. }
  75. // vowel sign "I" + sign Nukta forms vowel sign vocalic "L"
  76. if (arr[i] == 2367)
  77. {
  78. if (arr[i + 1] == 2364)
  79. {
  80. arr[i] = (char)2402;
  81. arr[i + 1] = (char)0;
  82. changed = true;
  83. }
  84. }
  85. // vowel sign "Ii" + sign Nukta forms vowel sign vocalic "LI"
  86. if (arr[i] == 2368)
  87. {
  88. if (arr[i + 1] == 2364)
  89. {
  90. arr[i] = (char)2403;
  91. arr[i + 1] = (char)0;
  92. changed = true;
  93. }
  94. }
  95. // Danda + sign Nukta forms sign Avagraha
  96. if (arr[i] == 2404)
  97. {
  98. if (arr[i + 1] == 2364)
  99. {
  100. arr[i] = (char)2365;
  101. arr[i + 1] = (char)0;
  102. changed = true;
  103. }
  104. }
  105. // consonant + Halant + Halant + consonant forms consonant + Halant + ZWNJ + consonant
  106. //if (arr[i] == 2381)
  107. //{
  108. // if (arr[i + 1] == 2381)
  109. // {
  110. // arr[i+1] = (char)8204; //
  111. // }
  112. //}
  113. // consonant + Halant + Nukta + consonant forms consonant + Halant + ZWJ + Consonant
  114. //if (arr[i] == 2364)
  115. //{
  116. // if (arr[i + 1] == 2381)
  117. // {
  118. // arr[i] = (char)2381; //
  119. // arr[i+1] = (char)8205; //
  120. // }
  121. //}
  122. /*if (arr[i] == 0x938 && arr[i + 1] == 0x94D)//थ')
  123. {
  124. arr[i] = (char)0x930;
  125. arr[i + 1] = (char)0;
  126. }*/
  127. }
  128. if (!changed)
  129. {
  130. return text;
  131. }
  132. var newText = new string(arr.Where(x => x != 0).ToArray());
  133. if (newText == text)
  134. return newText;
  135. text = newText;
  136. return text; // remove this later to allow for several passes
  137. }
  138. }
  139. }
  140. }