// SPDX-FileCopyrightText: 2016-2025 PyThaiNLP Project
// SPDX-FileType: SOURCE
// SPDX-License-Identifier: Apache-2.0

// Thai character sets
const thaiConsonants = "กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ";
const thaiToneMarks = "่้๊๋";

// Spelling class groups
const spellingClass = {
  กง: ["ง"],
  กม: ["ม"],
  เกย: ["ย"],
  เกอว: ["ว"],
  กน: ["น", "ญ", "ณ", "ร", "ล", "ฬ"],
  กก: ["ก", "ข", "ค", "ฆ"],
  กด: [
    "ด",
    "จ",
    "ช",
    "ซ",
    "ฎ",
    "ฏ",
    "ฐ",
    "ฑ",
    "ฒ",
    "ต",
    "ถ",
    "ท",
    "ธ",
    "ศ",
    "ษ",
    "ส",
  ],
  กบ: ["บ", "ป", "ภ", "พ", "ฟ"],
};

// All consonants except อ
const thaiConsonantsAll = thaiConsonants.split("").filter((c) => c !== "อ");
const temp = Object.values(spellingClass).flat().join("");
const notSpellingClass = thaiConsonantsAll.filter((c) => !temp.includes(c));

// Short vowels and regex patterns
const short = "ะัิึุ";
const reShort = /(เ.*ะ|แ.*ะ|เ.*อะ|โ.*ะ|เ.*าะ)/u;
const pattern = /(เ.*า)/u;

// Live/dead consonant groups
const check1 = ["กง", "กน", "กม", "เกย", "เกอว"].flatMap(
  (k) => spellingClass[k]
);
const check2 = [
  ...spellingClass["กก"],
  ...spellingClass["กบ"],
  ...spellingClass["กด"],
];

// Consonant type classifications
const thaiLowSonorants = ["ง", "น", "ม", "ย", "ร", "ล", "ว"];
const thaiLowAspirates = ["ค", "ช", "ซ", "ท", "พ", "ฟ", "ฮ"];
const thaiLowIrregular = ["ฆ", "ญ", "ณ", "ธ", "ภ", "ฅ", "ฌ", "ฑ", "ฒ", "ฬ"];
const thaiMidPlains = ["ก", "จ", "ด", "ต", "บ", "ป", "อ", "ฎ", "ฏ"];
const thaiHighAspirates = ["ข", "ฉ", "ถ", "ผ", "ฝ", "ส", "ห"];
const thaiHighIrregular = ["ศ", "ษ", "ฃ", "ฐ"];

const thaiInitialConsonantType = {
  low: [...thaiLowSonorants, ...thaiLowAspirates, ...thaiLowIrregular],
  mid: thaiMidPlains,
  high: [...thaiHighAspirates, ...thaiHighIrregular],
};

const thaiInitialConsonantToType = {};
for (const [type, consonants] of Object.entries(thaiInitialConsonantType)) {
  consonants.forEach((c) => (thaiInitialConsonantToType[c] = type));
}

// Define Thai vowels with length
const thaiVowels = {
  เี: { length: "long" },
  โะ: { length: "long" },
  เาะ: { length: "short" },
  เา: { length: "short" },
  า: { length: "long" },
  "ี": { length: "long" },
  "ู": { length: "long" },
  เ: { length: "long" },
  แ: { length: "long" },
  โ: { length: "long" },
  ไ: { length: "long" },
  ใ: { length: "long" },
  ะ: { length: "short" },
  "ิ": { length: "short" },
  "ุ": { length: "short" },
  "ั": { length: "short" },
  "ึ": { length: "short" },
  ำ: { length: "long" },
  "ื": { length: "long" },
};

/**
 * Identifies the initial consonant in a syllable
 * @param {string} syllable - Thai syllable
 * @returns {string|null} - Initial consonant or null if not found
 */
function detectInitialConsonant(syllable) {
  const consonants = syllable
    .split("")
    .filter((c) => thaiConsonants.includes(c));
  return consonants.length > 0 ? consonants[0] : null;
}

/**
 * Identifies the vowel in a syllable
 * @param {string} syllable - Thai syllable
 * @returns {string|null} - Vowel or null if not found
 */
function detectVowel(syllable) {
  const sortedVowels = Object.keys(thaiVowels).sort(
    (a, b) => b.length - a.length
  );
  for (const vowel of sortedVowels) {
    if (syllable.includes(vowel)) return vowel;
  }
  return null;
}

/**
 * Sound syllable classification (live or dead)
 * @param {string} syllable - Thai syllable
 * @returns {string} - "live" or "dead"
 */
function soundSyllable(syllable) {
  if (syllable.length < 2) return "dead";
  const consonants = syllable
    .split("")
    .filter((c) => thaiConsonantsAll.includes(c));
  if (
    consonants.length === 0 &&
    syllable.includes("อ") &&
    syllable.includes("เ") &&
    syllable.length === 2
  )
    return "live";

  const spellingConsonant = consonants[consonants.length - 1];
  if (
    check2.includes(spellingConsonant) &&
    !/[าีืแูาเโ]/.test(syllable) &&
    !/[ำใไ]/.test(syllable) &&
    !pattern.test(syllable)
  )
    return "dead";

  if (/[าีืแูาโ]/.test(syllable)) {
    if (check1.includes(spellingConsonant) && !reShort.test(syllable))
      return "live";
    if (
      spellingConsonant !== syllable[syllable.length - 1] &&
      !reShort.test(syllable)
    )
      return "live";
    if (check2.includes(spellingConsonant)) return "dead";
    if (
      reShort.test(syllable) ||
      short.split("").some((c) => syllable.includes(c))
    )
      return "dead";
    return "live";
  }

  if (/[ำใไ]/.test(syllable)) return "live";
  if (pattern.test(syllable)) return "live";
  if (check1.includes(spellingConsonant)) {
    if (
      (reShort.test(syllable) ||
        short.split("").some((c) => syllable.includes(c))) &&
      consonants.length < 2
    )
      return "dead";
    if (short.includes(syllable[syllable.length - 1])) return "dead";
    return "live";
  }

  if (
    reShort.test(syllable) ||
    short.split("").some((c) => syllable.includes(c))
  )
    return "dead";
  return "dead";
}

/**
 * Detects if a syllable is open or closed
 * @param {string} syllable - Thai syllable
 * @returns {string} - "open" or "close"
 */
function syllableOpenCloseDetector(syllable) {
  const consonants = syllable
    .split("")
    .filter((c) => thaiConsonants.includes(c));
  if (consonants.length < 2) return "open";
  if (consonants.length === 2 && consonants[consonants.length - 1] === "อ")
    return "open";
  return "close";
}

/**
 * Determines syllable length (long or short)
 * @param {string} syllable - Thai syllable
 * @returns {string} - "long" or "short"
 */
function syllableLength(syllable) {
  const consonants = syllable
    .split("")
    .filter((c) => thaiConsonants.includes(c));
  if (
    consonants.length <= 3 &&
    short.split("").some((c) => syllable.includes(c))
  )
    return "short";
  if (reShort.test(syllable)) return "short";
  return "long";
}

/**
 * Detects tone mark in syllable
 * @param {string} syllable - Thai syllable
 * @returns {string} - Tone mark or empty string
 */
function toneMarkDetector(syllable) {
  return syllable.split("").find((c) => thaiToneMarks.includes(c)) || "";
}

/**
 * Checks if syllable ends with a sonorant consonant
 * @param {string} syllable - Thai syllable
 * @returns {boolean}
 */
function checkSonorantSyllable(syllable) {
  const sonorants = syllable
    .split("")
    .filter((c) => thaiLowSonorants.includes(c));
  const consonants = syllable
    .split("")
    .filter((c) => thaiConsonants.includes(c));
  if (!sonorants.length || !consonants.length) return false;
  const lastSonorant = sonorants[sonorants.length - 1];
  return (
    lastSonorant === consonants[consonants.length - 2] ||
    lastSonorant === consonants[consonants.length - 1]
  );
}

/**
 * Analyzes a Thai syllable and tags the consonant and vowel
 * @param {string} syllable - Thai syllable
 * @returns {object} - Syllable properties including tagged consonant and vowel
 */
export function analyzeSyllable(syllable) {
  const sound = soundSyllable(syllable);
  const openClose = syllableOpenCloseDetector(syllable);
  const length = syllableLength(syllable);
  const tone = toneDetector(syllable);
  const vowel = detectVowel(syllable);
  const vowelLength = vowel ? thaiVowels[vowel].length : null;
  const initialConsonant = detectInitialConsonant(syllable);
  const consonantClass = initialConsonant
    ? thaiInitialConsonantToType[initialConsonant] || null
    : null;

  return {
    syllable,
    sound, // "live" or "dead"
    openClose, // "open" or "close"
    length, // "long" or "short" (overall syllable length)
    tone, // "l", "m", "r", "f", "h", or ""
    vowel, // The exact vowel (e.g., "า", "เี")
    vowelLength, // "long" or "short" (specific to the vowel)
    initialConsonant, // The initial consonant (e.g., "ก", "ม")
    consonantClass, // "low", "mid", or "high"
  };
}

/**
 * Detects the tone of a Thai syllable
 * @param {string} syllable - Thai syllable
 * @returns {string} - Tone: "l" (low), "m" (mid), "r" (rising), "f" (falling), "h" (high), or ""
 */
function toneDetector(syllable) {
  const s = soundSyllable(syllable);
  const consonants = syllable
    .split("")
    .filter((c) => thaiConsonants.includes(c));
  if (!consonants.length) return "";
  const initialConsonant = consonants[0];
  const toneMark = toneMarkDetector(syllable);
  const syllableCheck = syllableOpenCloseDetector(syllable);
  const syllableCheckLength = syllableLength(syllable);
  const initialConsonantType =
    thaiInitialConsonantToType[initialConsonant] || "";

  let r = "";
  if (consonants.length > 1 && ["อ", "ห"].includes(initialConsonant)) {
    const consonantEnding = checkSonorantSyllable(syllable);
    if (
      initialConsonant === "อ" &&
      consonantEnding &&
      s === "live" &&
      toneMark === "่"
    )
      r = "l";
    else if (
      initialConsonant === "ห" &&
      consonantEnding &&
      s === "live" &&
      toneMark === "่"
    )
      r = "l";
    else if (initialConsonant === "อ" && consonantEnding && s === "dead")
      r = "l";
    else if (
      initialConsonant === "ห" &&
      consonantEnding &&
      s === "live" &&
      toneMark === "้"
    )
      r = "f";
    else if (initialConsonant === "ห" && consonantEnding && s === "dead")
      r = "l";
    else if (initialConsonant === "ห" && consonantEnding && s === "live")
      r = "r";
  } else if (
    initialConsonantType === "high" &&
    s === "live" &&
    toneMark === "่"
  )
    r = "l";
  else if (initialConsonantType === "mid" && s === "live" && toneMark === "่")
    r = "l";
  else if (initialConsonantType === "low" && toneMark === "้") r = "h";
  else if (initialConsonantType === "mid" && toneMark === "๋") r = "r";
  else if (initialConsonantType === "mid" && toneMark === "๊") r = "h";
  else if (initialConsonantType === "low" && toneMark === "่") r = "f";
  else if (initialConsonantType === "mid" && toneMark === "้") r = "f";
  else if (initialConsonantType === "high" && toneMark === "้") r = "f";
  else if (
    initialConsonantType === "low" &&
    syllableCheckLength === "short" &&
    syllableCheck === "close" &&
    s === "dead"
  )
    r = "h";
  else if (
    initialConsonantType === "low" &&
    syllableCheckLength === "long" &&
    syllableCheck === "close" &&
    s === "dead"
  )
    r = "f";
  else if (
    initialConsonantType === "low" &&
    syllableCheckLength === "short" &&
    syllableCheck === "open"
  )
    r = "h";
  else if (initialConsonantType === "mid" && s === "dead") r = "l";
  else if (initialConsonantType === "high" && s === "dead") r = "l";
  else if (initialConsonantType === "low" && s === "live") r = "m";
  else if (initialConsonantType === "mid" && s === "live") r = "m";
  else if (initialConsonantType === "high" && s === "live") r = "r";

  return r;
}

// Example usage
// console.log(analyzeSyllable("มา"));
// {
//   syllable: "มา", sound: "live", openClose: "open", length: "long", tone: "m",
//   vowel: "า", vowelLength: "long", initialConsonant: "ม", consonantClass: "low"
// }
// console.log(analyzeSyllable("ไม้"));
// {
//   syllable: "ไม้", sound: "live", openClose: "close", length: "long", tone: "h",
//   vowel: "ไ", vowelLength: "long", initialConsonant: "ม", consonantClass: "low"
// }
// console.log(analyzeSyllable("เกีย"));
// {
//   syllable: "เกีย", sound: "live", openClose: "close", length: "long", tone: "m",
//   vowel: "เี", vowelLength: "long", initialConsonant: "ก", consonantClass: "mid"
// }
// console.log(analyzeSyllable("คะ"));
// {
//   syllable: "คะ", sound: "dead", openClose: "open", length: "short", tone: "h",
//   vowel: "ะ", vowelLength: "short", initialConsonant: "ค", consonantClass: "low"
// }
