#!/usr/bin/python2
import os.path, tempfile

def GetRecoder(unicode=False,charset=''):
    '''Another singleton :) '''
    try:
        cfg = Recoder(unicode,charset)
    except Recoder, config:
        cfg = config
    return cfg

def detect_charset():
    '''Try detect system encoding'''
    import locale
    locale.setlocale(locale.LC_ALL, '')

    try:
        retv=locale.nl_langinfo(locale.CODESET)
    except AttributeError:
        retv = "Windows-1251"

    return retv

class Recoder:
    '''Detect encoding, recode from one encoding to other and back etc.'''
    __recoder=None
    def __init__(self, unicode=False, charset=''):
        if Recoder.__recoder:
            raise Recoder.__recoder

        Recoder.__recoder = self

        self.table_names = ["KOI8-R","Windows-1251","CP866","ISO_8859-5"]
        self.tables = [
            # koi
            {0xc1: 0xc1, 0xc0: 0xc0, 0xc3: 0xc3, 0xc2: 0xc2,
             0xc5: 0xc5, 0xc4: 0xc4, 0xc7: 0xc7, 0xc6: 0xc6,
             0xc9: 0xc9, 0xc8: 0xc8, 0xcb: 0xcb, 0xca: 0xca,
             0xcd: 0xcd, 0xcc: 0xcc, 0xcf: 0xcf, 0xce: 0xce,
             0xd1: 0xd1, 0xd0: 0xd0, 0xd3: 0xd3, 0xd2: 0xd2,
             0xd5: 0xd5, 0xd4: 0xd4, 0xd7: 0xd7, 0xd6: 0xd6,
             0xd9: 0xd9, 0xd8: 0xd8, 0xdb: 0xdb, 0xda: 0xda,
             0xdd: 0xdd, 0xdc: 0xdc, 0xdf: 0xdf, 0xde: 0xde},
            # win
            {0xe1: 0xc2, 0xe0: 0xc1, 0xe3: 0xc7, 0xe2: 0xd7,
             0xe5: 0xc5, 0xe4: 0xc4, 0xe7: 0xda, 0xe6: 0xd6,
             0xe9: 0xca, 0xe8: 0xc9, 0xeb: 0xcc, 0xea: 0xcb,
             0xed: 0xce, 0xec: 0xcd, 0xef: 0xd0, 0xee: 0xcf,
             0xf1: 0xd3, 0xf0: 0xd2, 0xf3: 0xd5, 0xf2: 0xd4,
             0xf5: 0xc8, 0xf4: 0xc6, 0xf7: 0xde, 0xf6: 0xc3,
             0xf9: 0xdd, 0xf8: 0xdb, 0xfb: 0xd9, 0xfa: 0xdf,
             0xfd: 0xdc, 0xfc: 0xd8, 0xff: 0xd1, 0xfe: 0xc0},
            # alt (cp 866)
            {0xa6: 0xd6, 0xe1: 0xd3, 0xea: 0xdf, 0xe0: 0xd2,
             0xed: 0xdc, 0xe3: 0xd5, 0xec: 0xd8, 0xe7: 0xde,
             0xa2: 0xd7, 0xef: 0xd1, 0xe5: 0xc8, 0xae: 0xcf,
             0xa4: 0xc4, 0xa1: 0xc2, 0xa0: 0xc1, 0xa3: 0xc7,
             0xe2: 0xd4, 0xa5: 0xc5, 0xe4: 0xc6, 0xa7: 0xda,
             0xe6: 0xc3, 0xa9: 0xca, 0xa8: 0xc9, 0xab: 0xcc,
             0xaa: 0xcb, 0xad: 0xce, 0xac: 0xcd, 0xaf: 0xd0,
             0xee: 0xc0, 0xe9: 0xdd, 0xe8: 0xdb, 0xeb: 0xd9},
            # iso
            {0xd1: 0xc2, 0xd0: 0xc1, 0xd3: 0xc7, 0xd2: 0xd7,
             0xd5: 0xc5, 0xd4: 0xc4, 0xd7: 0xda, 0xd6: 0xd6,
             0xd9: 0xca, 0xd8: 0xc9, 0xdb: 0xcc, 0xda: 0xcb,
             0xdd: 0xce, 0xdc: 0xcd, 0xdf: 0xd0, 0xde: 0xcf,
             0xe1: 0xd3, 0xe0: 0xd2, 0xe3: 0xd5, 0xe2: 0xd4,
             0xe5: 0xc8, 0xe4: 0xc6, 0xe7: 0xde, 0xe6: 0xc3,
             0xe9: 0xdd, 0xe8: 0xdb, 0xeb: 0xd9, 0xea: 0xdf,
             0xed: 0xdc, 0xec: 0xd8, 0xef: 0xd1, 0xee: 0xc0}
            ]
        self.freq = {0xc1: 806, 0xc0: 51 , 0xc3: 43  , 0xc2: 152,
                0xc5: 818, 0xc4: 282, 0xc7: 199 , 0xc6: 6  ,
                0xc9: 656, 0xc8: 92 , 0xcb: 341 , 0xca: 113,
                0xcd: 305, 0xcc: 550, 0xcf: 1083, 0xce: 630,
                0xd1: 192, 0xd0: 260, 0xd3: 524 , 0xd2: 433,
                0xd5: 292, 0xd4: 538, 0xd7: 449 , 0xd6: 94 ,
                0xd9: 184, 0xd8: 162, 0xdb: 108 , 0xda: 179,
                0xdd: 45 , 0xdc: 17 , 0xdf: 1   , 0xde: 129}

        if not (charset and self.set_charset(charset)):
            self.charset = detect_charset()
        self.unicode = unicode
        self.charsets_list = {}

    def detect(self, data):
        '''Detect data encoding'''
        rating = []
        for table in self.tables:
            rating.append(0)
            for ch in data:
                try: rating[-1] +=  self.freq[table[ord(ch)]]
                except: pass
        charset=0
        for rate in range(0,len(rating)):
            if rating[charset] < rating[rate]:
                charset = rate
                
        return self.table_names[charset]

    def decode(self, data, cs_from=''):
        '''
	   Recode data from cs_from to system encoding. If unicode enabled, recode
           to unicode. If cs_from is empty, system charset used. It useful for
           recoding from system charset to unicode. If something wrong,
           data is not change
        '''
        if not cs_from:
            cs_from = self.charset
        
        if not self.unicode and cs_from == self.charset:
            return data

        try:
            data = unicode(data, cs_from)
        except:
            pass
        
        if not self.unicode:
            try:
                data = data.encode(self.charset,'ignore')
            except:
                pass
        return data
    
    def encode(self, data, cs_to=''):
        '''
        Recode data from system encoding to cs_to. If cs_to is empty, will
        recode to system charset. It useful for recode from unicode to system charset
        '''
        if not cs_to:
            cs_to = self.charset
            
        if not unicode and cs_to == self.charset:
            return data

        if not self.unicode:
            try:
                data = unicode(data, self.charset)
            except:
                pass
        try:
            data = data.encode(cs_to,'ignore')
        except:
            pass

        return data

    def recode_file(self, from_file, from_encoding='', to_file='', to_encoding=''):
        '''Recode file from_file from from_encoding encoding to to_encoding encoding,
        write result to to_file. If to_file is empty, make temproray file.
        if from_encoding or to_encoding is empty, set them to system encoding'''
        
        if not to_file:
            dir = os.path.dirname(from_file)
            tmp=tempfile.tempdir
            tempfile.tempdir=dir
            to_file=tempfile.mktemp(".$$$")
            tempfile.tempdir=tmp
        if not from_encoding:
            from_encoding = self.charset
        if not to_encoding:
            to_encoding = self.charset

        if to_encoding != from_encoding:
            f = file(from_file,"r")
            data = f.read()
            f.close()
            data = unicode(data, from_encoding).encode(to_encoding, 'ignore')
            f = file(to_file,"w")
            f.write(data)
            f.close()
        else:
            to_file = from_file
            
        return to_file
    
    def str(self, data):
        if type(data) == type(u''):
            return data
        
        if not self.unicode:
            return str(data)

        return self.decode(str(data))
        
    def check_charset(self, value):
        '''Check encoding. If encoding is not valid, return
        false, else return True'''
        try: unicode('a', value)
        except LookupError, e:
            return False
        else:
            return True
        
    def set_charset(self, charset):
        ''' 
	Set system charset. If charset is invald 
        system charset not change
	''' 
        if not self.check_charset(charset):
            return False
        self.charset = charset
        return True
    
    def get_charsets_list(self):
        '''Get encodings list'''
	if not self.charsets_list:
	    for item in self.table_names:
	        self.charsets_list[item] = item

        return self.charsets_list

    def set_charsets_list(self, lst):
        '''Set encodings list'''
        self.charsets_list = lst

    def add_charset(self, name, charset):
        '''Add new charset to list.'''
        if not (name and charset):
            return False
        
        if not self.check_charset(charset):
            return False
        
	self.charsets_list[name] = charset
        return True
    
    def del_charset(self, name):
        '''Remove codeser from list of known ones'''
        try:
            del self.charsets_list[name]
        except:
            pass

    
#####################################################

if __name__ == "__main__":
    #     f=open("test.txt","r")
    #     data = f.read()
    #     f.close()
    code=GetRecoder()
    print code.add_charset("aaaaa","koi8-r")
    print code.add_charset("zzzaa","koi8-r")
    print code.add_charset("axxca","koi8-r")
    print code.add_charset("aafaa","koi8-r")
    print code.add_charset("zzzaa","koix-r")
    print code.add_charset("z22","koi8-r")
    print code.get_charsets_list()
    code.del_charset("z22")
    print code.get_charsets_list()
    #     en_from = code.detect(data[0:2048])
    #     print "detected %s" % en_from
    #     print code.recode(data,en_from)
