turbogears.i18n.pygettext.catalog

1 import sys 2 import os 3 import codecs 4 import pygettext 5 6 MESSAGES = [] 7 8

9 -def detect_unicode_encoding(bytes):

10 encodings_map = [ 11 (3, codecs.BOM_UTF8, 'UTF-8'), 12 (4, codecs.BOM_UTF32_LE, 'UTF-32LE'), 13 (4, codecs.BOM_UTF32_BE, 'UTF-32BE'), 14 (2, codecs.BOM_UTF16_LE, 'UTF-16LE'), 15 (2, codecs.BOM_UTF16_BE, 'UTF-16BE'), 16 ] 17 for (offset, bom, name) in encodings_map: 18 if bytes[:offset] == bom: 19 return name, offset 20 21 return 'UTF-8', 0

22 23

24 -class ParseError(ValueError):

25 """Signals an error reading .po file."""

26 27

28 -def merge(master_file, language_files):

29 parsed_master_file = parse(master_file) 30 for path in language_files: 31 merging(parsed_master_file, path)

32 33

34 -def merging(parsed_master_file, path):

35 lang_file = parse(path) 36 id_map = {} 37 new_lang = [] 38 for msg in lang_file: 39 message = msg['message'] 40 if message: # ignore empty messages, as msgfmt does 41 id_map[msg['id']] = message 42 43 for msg in parsed_master_file: 44 msg['message'] = id_map.get(msg['id']) 45 new_lang.append(msg) 46 47 save(path, new_lang)

48 49

50 -def items(path, sort_by, dir):

51 po = parse(path) 52 po = po[1:] 53 if sort_by: 54 return sort(po, sort_by, dir) 55 56 return po

57 58

59 -def sort(po, sort_by, dir):

60 group = dict() 61 sorted = list() 62 col_map = dict(id='id', string='message', context='path') 63 for message in po: 64 group.setdefault(message[col_map[sort_by]], []).append(message) 65 66 kg = group.keys() 67 kg.sort() 68 if dir == 'up': 69 kg.reverse() 70 71 for k in kg: 72 sorted.extend(group[k]) 73 74 return sorted

75 76

77 -def save(path, message_list):

78 txt = [] 79 m = message_list[0]['message'] 80 txt.append(m) 81 txt.append(u'\n\n') 82 83 for p in message_list[1:]: 84 message = p['message'] or '' 85 context = p['context'] 86 id = p['id'] 87 txt.append(u'#: %s' % context) 88 txt.append(u'msgid %s\n' % normalize(id)) 89 txt.append(u'msgstr %s\n\n' % normalize(message)) 90 91 txt = u''.join(txt) 92 backup_name = path.replace('.po', '.back') 93 94 try: 95 os.remove(backup_name) 96 97 except os.error: 98 pass 99 100 os.rename(path, backup_name) 101 codecs.open(path, 'wb', 'utf-8').write(txt)

102 103

104 -def update(path, msg_id, msg_text):

105 message_list = parse(path) 106 for p in message_list[1:]: 107 if p['id'].strip() == msg_id.strip(): 108 p['message'] = msg_text 109 save(path, message_list)

110 111

112 -def quote(msg):

113 return pygettext.escape_unicode(msg)

114 115

116 -def normalize(s):

117 # taken from pygettext module but changed a bit 118 lines = s.split('\n') 119 if len(lines) == 1: 120 s = '"' + quote(s) + '"' 121 122 else: 123 if not lines[-1]: 124 del lines[-1] 125 lines[-1] = lines[-1] + '\n' 126 127 for i in range(len(lines)): 128 lines[i] = quote(lines[i]) 129 130 lineterm = '\\n"\n"' 131 s = '""\n"' + lineterm.join(lines) + '"' 132 133 return s

134 135

136 -def add(id, str, context, fuzzy, MESSAGES):

137 "Add a non-fuzzy translation to the dictionary." 138 if fuzzy: 139 return 140 141 c = context.split(':') 142 path = c[0] 143 file = os.path.basename(path) 144 line = c[-1].replace('\n','') #remove the \n 145 146 MESSAGES.append(dict(id=id, 147 message=str, 148 path=path, 149 context=context, 150 file=file, 151 line=line 152 ))

153 154

155 -def parse(infile):

156 MESSAGES = list() 157 ID = 1 158 STR = 2 159 header = list() 160 161 fd = open(infile, 'rt') 162 encoding, offset = detect_unicode_encoding(fd.read(4)) 163 fd.seek(offset) 164 lines = [line.decode(encoding) for line in fd.readlines()] 165 166 section = None 167 fuzzy = 0 168 169 # Parse the catalog 170 lno = 0 171 context = '' 172 prev_context = '' 173 heading = True 174 for l in lines: 175 if not l: 176 continue 177 178 lno += 1 179 if heading: 180 if l.startswith('#: '): 181 heading = False 182 183 if l.startswith('msgid "') and header and \ 184 'Generated-By:' in header[-1]: 185 heading = False 186 187 if l.strip() and heading: 188 header.append(l) 189 190 # If we get a comment line after a msgstr, this is a new entry 191 if l[0] == '#' and section == STR: 192 add(msgid, msgstr, prev_context, fuzzy, MESSAGES) 193 section = None 194 fuzzy = 0 195 196 # Record a fuzzy mark 197 if l[:2] == '#,' and l.find('fuzzy'): 198 fuzzy = 1 199 200 if l.startswith('#: '): 201 context = l[len('#: '):] 202 203 # Skip comments 204 if l[0] == '#': 205 continue 206 207 # Now we are in a msgid section, output previous section 208 if l.startswith('msgid'): 209 if section == STR: 210 add(msgid, msgstr, prev_context, fuzzy, MESSAGES) 211 212 section = ID 213 prev_context = context 214 l = l[5:] 215 msgid = msgstr = '' 216 217 # Now we are in a msgstr section 218 elif l.startswith('msgstr'): 219 section = STR 220 l = l[6:] 221 222 # Skip empty lines 223 l = l.strip() 224 if not l: 225 continue 226 227 # XXX: Does this always follow Python escape semantics? 228 try: 229 l = eval(l) 230 except Exception, e: 231 print >> sys.stderr, 'Escape error on %s: %d' % (infile, lno), \ 232 'before:', repr(l) 233 raise ParseError(e) 234 235 try: 236 l = l.decode('utf8') 237 except UnicodeDecodeError: 238 print >> sys.stderr, 'Encoding error on %s: %d' % (infile, lno), \ 239 'before:', repr(l) 240 raise ParseError(e) 241 242 if section == ID: 243 msgid += l 244 245 elif section == STR: 246 msgstr += l 247 248 else: 249 print >> sys.stderr, 'Syntax error on %s:%d' % (infile, lno), \ 250 'before:' 251 print >> sys.stderr, l 252 raise ParseError(e) 253 254 # Add last entry 255 if section == STR: 256 add(msgid, msgstr, prev_context, fuzzy, MESSAGES) 257 258 MESSAGES[0]['message'] = u''.join(header) 259 return MESSAGES

260

Source Code for Module turbogears.i18n.pygettext.catalog