<?php

namespace App\Helpers;

class KurdishTextHelper
{
    /**
     * Get Kurdish text mappings from config
     */
    private static function getMappings()
    {
        return config('kurdish_mappings.mappings', [
            '???? ???' => 'ئاسۆ کوی',
        ]);
    }

    /**
     * Detect if text is corrupted (contains ? characters)
     */
    public static function isCorrupted($text)
    {
        if (empty($text)) {
            return false;
        }
        
        return strpos($text, '?') !== false;
    }

    /**
     * Check if text contains Kurdish characters
     */
    public static function containsKurdish($text)
    {
        if (empty($text)) {
            return false;
        }
        
        // Kurdish characters in Arabic script
        return preg_match('/[\x{0621}-\x{064A}\x{0660}-\x{0669}\x{067E}\x{0686}\x{0698}\x{06A9}\x{06AF}\x{06B5}\x{06C6}\x{06D5}\x{06F0}-\x{06F9}]/u', $text);
    }

    /**
     * Attempt to fix corrupted Kurdish text
     * Returns fixed text if successful, original text if not
     */
    public static function fixCorruptedText($text)
    {
        if (empty($text)) {
            return $text;
        }

        // If not corrupted, return as is
        if (!self::isCorrupted($text)) {
            return $text;
        }

        // Try known mappings first from config
        $mappings = self::getMappings();
        if (isset($mappings[$text])) {
            return $mappings[$text];
        }

        // Try different encoding conversions from config
        $encodings = config('kurdish_mappings.try_encodings', ['Windows-1256', 'ISO-8859-6', 'CP1256']);
        
        foreach ($encodings as $encoding) {
            try {
                $converted = mb_convert_encoding($text, 'UTF-8', $encoding);
                
                // Check if conversion was successful
                if ($converted !== $text && !self::isCorrupted($converted)) {
                    return $converted;
                }
            } catch (\Exception $e) {
                continue;
            }
        }

        // Handle unfixable corruption based on config
        $unfixableReturn = config('kurdish_mappings.unfixable_return', 'null');
        
        switch ($unfixableReturn) {
            case 'empty':
                return '';
            case 'original':
                return $text;
            case 'null':
            default:
                return null; // Return null to indicate unfixable corruption
        }
    }

    /**
     * Ensure text is properly UTF-8 encoded
     */
    public static function ensureUtf8($text)
    {
        if (empty($text)) {
            return $text;
        }

        // If already valid UTF-8, return as is
        if (mb_check_encoding($text, 'UTF-8')) {
            return $text;
        }

        // Try to convert to UTF-8
        return mb_convert_encoding($text, 'UTF-8', 'auto');
    }

    /**
     * Process text: fix corruption and ensure UTF-8
     */
    public static function process($text)
    {
        if (empty($text)) {
            return $text;
        }

        // First, try to fix corruption
        $fixed = self::fixCorruptedText($text);
        
        // If unfixable (null returned), keep original
        if ($fixed === null) {
            $fixed = $text;
        }

        // Ensure UTF-8 encoding
        return self::ensureUtf8($fixed);
    }

    /**
     * Transliterate Arabic/Kurdish characters to English alphabet
     * Used for generating user names and emails from Unicode names
     * Handles ALL Arabic and Kurdish Unicode characters
     */
    public static function transliterateToEnglish($text)
    {
        if (empty($text)) {
            return $text;
        }
        
        // Ensure the text is properly UTF-8 encoded
        if (!mb_check_encoding($text, 'UTF-8')) {
            $text = mb_convert_encoding($text, 'UTF-8', 'auto');
        }
        
        // Normalize the text to ensure consistent character representation
        $text = mb_convert_encoding($text, 'UTF-8', 'UTF-8');

        // Comprehensive Arabic/Kurdish to English character mapping
        // Covers all standard Arabic letters, Kurdish extensions, and variations
        $transliterationMap = [
            // Arabic basic letters (U+0621 - U+063A)
            'ء' => '', 'آ' => 'aa', 'أ' => 'a', 'أ' => 'a', 'إ' => 'i', 'ا' => 'a', 'ب' => 'b',
            'ت' => 't', 'ث' => 'th', 'ج' => 'j', 'ح' => 'h', 'خ' => 'kh',
            'د' => 'd', 'ذ' => 'th', 'ر' => 'r', 'ز' => 'z', 'س' => 's', 'ش' => 'sh',
            'ص' => 's', 'ض' => 'd', 'ط' => 't', 'ظ' => 'z', 'ع' => 'a', 'غ' => 'gh',
            // Arabic basic letters continued (U+0641 - U+064A)
            'ف' => 'f', 'ق' => 'q', 'ك' => 'k', 'ل' => 'l', 'م' => 'm', 'ن' => 'n',
            'ه' => 'h', 'و' => 'w', 'ي' => 'y', 'ى' => 'a', 'ة' => 'h',
            // Arabic extended letters (Kurdish/Persian additions)
            'پ' => 'p',        // U+067E - Pe
            'چ' => 'ch',       // U+0686 - Che
            'ژ' => 'zh',       // U+0698 - Zhe
            'گ' => 'g',        // U+06AF - Gaf
            'ڕ' => 'r',       // U+0695 - Kurdish R
            'ڵ' => 'l',       // U+06B5 - Kurdish L
            'ڶ' => 'l',       // U+06B6
            'ڷ' => 'l',       // U+06B7
            'ڸ' => 'l',       // U+06B8
            'ڹ' => 'n',       // U+06B9
            'ں' => 'n',       // U+06BA
            'ڻ' => 'n',       // U+06BB
            'ڼ' => 'n',       // U+06BC
            'ڽ' => 'n',       // U+06BD
            'ھ' => 'h',       // U+06BE - Heh Doachashmee
            'ہ' => 'h',       // U+06C1 - Heh Goal
            'ۃ' => 'h',       // U+06C3 - Teh Marbuta Goal
            // Arabic letter variations and diacritics
            'ؤ' => 'u',       // U+0624 - Waw with Hamza
            'ئ' => '',        // U+0626 - Yeh with Hamza
            'ۄ' => 'o',       // U+06C4 - Waw with Ring
            'ۅ' => 'o',       // U+06C5 - Kirghiz Oe
            'ۆ' => 'o',       // U+06C6 - Oe
            'ۇ' => 'u',       // U+06C7 - U
            'ۈ' => 'u',       // U+06C8 - Yu
            'ۉ' => 'u',       // U+06C9 - Kirghiz Yu
            'ۊ' => 'u',       // U+06CA - Waw with Two Dots Above
            'ۋ' => 'v',       // U+06CB - Ve
            'ی' => 'y',       // U+06CC - Farsi Yeh
            'ۍ' => 'y',       // U+06CD - Yeh with Tail
            'ێ' => 'e',       // U+06CE - Yeh with Small V
            'ۏ' => 'w',       // U+06CF - Waw with Dot Above
            'ې' => 'e',       // U+06D0 - E
            'ۑ' => 'y',       // U+06D1 - Yeh with Three Dots Below
            'ے' => 'y',       // U+06D2 - Yeh Barree
            'ۓ' => 'y',       // U+06D3 - Yeh Barree with Hamza Above
            // Additional Arabic presentation forms (isolated, initial, medial, final)
            'ٱ' => 'a',       // U+0671 - Alef Wasla
            'ٲ' => 'a',       // U+0672
            'ٳ' => 'a',       // U+0673
            'ٵ' => 'aa',      // U+0675
            'ٶ' => 'o',       // U+0676
            'ٷ' => 'u',       // U+0677
            'ٸ' => 'i',       // U+0678
            'ٹ' => 't',       // U+0679 - Tteh
            'ٺ' => 't',       // U+067A - Tteheh
            'ٻ' => 'b',       // U+067B - Bbeh
            'ټ' => 't',       // U+067C - Teh with Ring
            'ٽ' => 't',       // U+067D - Teh with Three Dots Above Downwards
            'پ' => 'p',       // U+067E - Peh
            'ٿ' => 't',       // U+067F - Teheh
            'ڀ' => 'b',       // U+0680 - Beheh
            'ځ' => 'h',       // U+0681 - Hah with Hamza Above
            'ڂ' => 'h',       // U+0682 - Hah with Two Dots Vertical Above
            'ڃ' => 'h',       // U+0683 - Hah with Three Dots Above
            'ڄ' => 'h',       // U+0684 - Hah with Four Dots Above
            'څ' => 'h',       // U+0685 - Hah with Three Dots Below
            'چ' => 'ch',      // U+0686 - Cheh
            'ڇ' => 'ch',     // U+0687 - Cheh with Dot Above
            'ڈ' => 'd',       // U+0688 - Ddah
            'ډ' => 'd',      // U+0689 - Dah with Ring
            'ڊ' => 'd',      // U+068A - Dah with Dot Below
            'ڋ' => 'd',      // U+068B - Dah with Dot Below and Small Tah
            'ڌ' => 'd',      // U+068C - Dah with Two Dots Above
            'ڍ' => 'd',      // U+068D - Dah with Four Dots Above
            'ڎ' => 'd',      // U+068E - Dah with Three Dots Above Downwards
            'ڏ' => 'd',      // U+068F - Dah with Four Dots Above
            'ڐ' => 'd',      // U+0690 - Ddah
            'ڑ' => 'r',      // U+0691 - Rreh
            'ڒ' => 'r',      // U+0692 - Reh with Small V
            'ړ' => 'r',      // U+0693 - Reh with Ring
            'ڔ' => 'r',      // U+0694 - Reh with Dot Below
            'ڕ' => 'r',      // U+0695 - Reh with Small V Below
            'ږ' => 'r',      // U+0696 - Reh with Dot Below and Dot Above
            'ڗ' => 'r',      // U+0697 - Reh with Two Dots Above
            'ژ' => 'zh',     // U+0698 - Jeh
            'ڙ' => 'r',      // U+0699 - Reh with Four Dots Above
            'ښ' => 's',      // U+069A - Seen with Dot Below and Dot Above
            'ڛ' => 's',      // U+069B - Seen with Three Dots Below
            'ڜ' => 's',      // U+069C - Seen with Three Dots Below and Three Dots Above
            'ڝ' => 's',      // U+069D - Sad with Two Dots Below
            'ڞ' => 's',      // U+069E - Sad with Three Dots Above
            'ڟ' => 't',      // U+069F - Tah with Three Dots Above
            'ڠ' => 'gh',     // U+06A0 - Ain with Three Dots Above
            'ڡ' => 'f',      // U+06A1 - Dotless Feh
            'ڢ' => 'f',      // U+06A2 - Feh with Dot Moved Below
            'ڣ' => 'f',      // U+06A3 - Feh with Dot Below
            'ڤ' => 'v',      // U+06A4 - Veh
            'ڥ' => 'f',      // U+06A5 - Feh with Three Dots Below
            'ڦ' => 'p',      // U+06A6 - Peheh
            'ڧ' => 'q',      // U+06A7 - Qaf with Dot Above
            'ڨ' => 'q',      // U+06A8 - Qaf with Three Dots Above
            'ک' => 'k',      // U+06A9 - Keheh
            'ڪ' => 'k',      // U+06AA - Swash Kaf
            'ګ' => 'g',      // U+06AB - Kaf with Ring
            'ڬ' => 'k',      // U+06AC - Kaf with Dot Above
            'ڭ' => 'ng',     // U+06AD - Ng
            'ڮ' => 'k',      // U+06AE - Kaf with Three Dots Below
            'گ' => 'g',      // U+06AF - Gaf
            'ڱ' => 'ng',     // U+06B0 - Gaf with Inverted Stroke
            'ڲ' => 'g',      // U+06B1 - Gaf with Two Dots Below
            'ڳ' => 'g',      // U+06B2 - Gueh
            'ڴ' => 'g',      // U+06B3 - Gaf with Three Dots Above
            'ڵ' => 'l',      // U+06B5 - Lam with Small V
            'ڶ' => 'l',      // U+06B6 - Lam with Dot Above
            'ڷ' => 'l',      // U+06B7 - Lam with Three Dots Above
            'ڸ' => 'l',      // U+06B8 - Lam with Three Dots Below
            'ڹ' => 'n',      // U+06B9 - Noon with Dot Below
            'ں' => 'n',      // U+06BA - Noon Ghunna
            'ڻ' => 'n',      // U+06BB - Rnoon
            'ڼ' => 'n',      // U+06BC - Noon with Ring
            'ڽ' => 'n',      // U+06BD - Noon with Three Dots Above
            'ھ' => 'h',      // U+06BE - Heh Doachashmee
            'ہ' => 'h',      // U+06C1 - Heh Goal
            'ۂ' => 'h',      // U+06C2 - Heh Goal with Hamza Above
            'ۃ' => 'h',      // U+06C3 - Teh Marbuta Goal
            'ۄ' => 'o',      // U+06C4 - Waw with Ring
            'ۅ' => 'o',      // U+06C5 - Kirghiz Oe
            'ۆ' => 'o',      // U+06C6 - Oe
            'ۇ' => 'u',      // U+06C7 - U
            'ۈ' => 'u',      // U+06C8 - Yu
            'ۉ' => 'u',      // U+06C9 - Kirghiz Yu
            'ۊ' => 'u',      // U+06CA - Waw with Two Dots Above
            'ۋ' => 'v',      // U+06CB - Ve
            'ی' => 'y',      // U+06CC - Farsi Yeh
            'ۍ' => 'y',      // U+06CD - Yeh with Tail
            'ێ' => 'e',      // U+06CE - Yeh with Small V
            'ۏ' => 'w',      // U+06CF - Waw with Dot Above
            'ې' => 'e',      // U+06D0 - E
            'ۑ' => 'y',      // U+06D1 - Yeh with Three Dots Below
            'ے' => 'y',      // U+06D2 - Yeh Barree
            'ۓ' => 'y',      // U+06D3 - Yeh Barree with Hamza Above
            'ۥ' => '',      // U+06E5 - Small Waw
            'ۦ' => '',      // U+06E6 - Small Yeh
        ];

        // Use manual mapping first (more reliable than iconv on Windows)
        // Then try iconv as a fallback for any unmapped characters
        $transliterated = '';
        $length = mb_strlen($text, 'UTF-8');
        
        for ($i = 0; $i < $length; $i++) {
            $char = mb_substr($text, $i, 1, 'UTF-8');
            
            if (isset($transliterationMap[$char])) {
                // Use our manual mapping
                $transliterated .= $transliterationMap[$char];
            } elseif (preg_match('/[a-z0-9]/u', $char)) {
                // Keep English letters and numbers
                $transliterated .= $char;
            } elseif (in_array($char, [' ', '-', '_', '.'])) {
                // Keep spaces and common separators
                $transliterated .= $char;
            } else {
                // For unmapped Unicode characters, try iconv transliteration
                $iconvResult = @iconv('UTF-8', 'ASCII//TRANSLIT//IGNORE', $char);
                if ($iconvResult !== false && !empty($iconvResult)) {
                    $transliterated .= $iconvResult;
                }
                // If iconv also fails, skip the character
            }
        }
        
        // Clean up the result
        // Remove any remaining non-ASCII characters that might have slipped through
        // But keep spaces, dots, hyphens, underscores, and alphanumeric
        $result = preg_replace('/[^a-z0-9\s\-_\.]/i', '', $transliterated);
        
        // Normalize spaces: replace multiple spaces with single space
        $result = preg_replace('/\s+/', ' ', $result);
        
        // Trim and convert to lowercase
        $result = mb_strtolower(trim($result), 'UTF-8');
        
        // Remove any remaining non-printable characters
        $result = preg_replace('/[\x00-\x1F\x7F]/', '', $result);
        
        // If result is empty or only contains spaces/dots after transliteration, use a fallback
        $cleanResult = trim($result, ' .-');
        if (empty($cleanResult)) {
            $result = 'customer';
        }
        
        return $result;
    }

}

