turbogears.i18n.pygettext.pygettext

1 #! /usr/bin/env python 2 # -*- coding: iso-8859-1 -*- 3 # Originally written by Barry Warsaw <barry@zope.com> 4 # 5 # Minimally patched to make it even more xgettext compatible 6 # by Peter Funk <pf@artcom-gmbh.de> 7 # 8 # 2002-11-22 Jürgen Hermann <jh@web.de> 9 # Added checks that _() only contains string literals, and 10 # command line args are resolved to module lists, i.e. you 11 # can now pass a filename, a module or package name, or a 12 # directory (including globbing chars, important for Win32). 13 # Made docstring fit in 80 chars wide displays using pydoc. 14 # 15 16 # for selftesting 17 import re 18 try: 19 import fintl 20 _ = fintl.gettext 21 except ImportError: 22 _ = lambda s: s 23 24 __doc__ = _("""pygettext -- Python equivalent of xgettext(1) 25 26 Many systems (Solaris, Linux, Gnu) provide extensive tools that ease the 27 internationalization of C programs. Most of these tools are independent of 28 the programming language and can be used from within Python programs. 29 Martin von Loewis' work[1] helps considerably in this regard. 30 31 There's one problem though; xgettext is the program that scans source code 32 looking for message strings, but it groks only C (or C++). Python 33 introduces a few wrinkles, such as dual quoting characters, triple quoted 34 strings, and raw strings. xgettext understands none of this. 35 36 Enter pygettext, which uses Python's standard tokenize module to scan 37 Python source code, generating .pot files identical to what GNU xgettext[2] 38 generates for C and C++ code. From there, the standard GNU tools can be 39 used. 40 41 A word about marking Python strings as candidates for translation. GNU 42 xgettext recognizes the following keywords: gettext, dgettext, dcgettext, 43 and gettext_noop. But those can be a lot of text to include all over your 44 code. C and C++ have a trick: they use the C preprocessor. Most 45 internationalized C source includes a #define for gettext() to _() so that 46 what has to be written in the source is much less. Thus these are both 47 translatable strings: 48 49 gettext("Translatable String") 50 _("Translatable String") 51 52 Python of course has no preprocessor so this doesn't work so well. Thus, 53 pygettext searches only for _() by default, but see the -k/--keyword flag 54 below for how to augment this. 55 56 [1] http://www.python.org/workshops/1997-10/proceedings/loewis.html 57 [2] http://www.gnu.org/software/gettext/gettext.html 58 59 NOTE: pygettext attempts to be option and feature compatible with GNU 60 xgettext where ever possible. However some options are still missing or are 61 not fully implemented. Also, xgettext's use of command line switches with 62 option arguments is broken, and in these cases, pygettext just defines 63 additional switches. 64 65 Usage: pygettext [options] inputfile ... 66 67 Options: 68 69 -a 70 --extract-all 71 Extract all strings. 72 73 -d name 74 --default-domain=name 75 Rename the default output file from messages.pot to name.pot. 76 77 -E 78 --escape 79 Replace non-ASCII characters with octal escape sequences. 80 81 -D 82 --docstrings 83 Extract module, class, method, and function docstrings. These do 84 not need to be wrapped in _() markers, and in fact cannot be for 85 Python to consider them docstrings. (See also the -X option). 86 87 -h 88 --help 89 Print this help message and exit. 90 91 -k word 92 --keyword=word 93 Keywords to look for in addition to the default set, which are: 94 %(DEFAULTKEYWORDS)s 95 96 You can have multiple -k flags on the command line. 97 98 -K 99 --no-default-keywords 100 Disable the default set of keywords (see above). Any keywords 101 explicitly added with the -k/--keyword option are still recognized. 102 103 --no-location 104 Do not write filename/lineno location comments. 105 106 -n 107 --add-location 108 Write filename/lineno location comments indicating where each 109 extracted string is found in the source. These lines appear before 110 each msgid. The style of comments is controlled by the -S/--style 111 option. This is the default. 112 113 -o filename 114 --output=filename 115 Rename the default output file from messages.pot to filename. If 116 filename is `-' then the output is sent to standard out. 117 118 -p dir 119 --output-dir=dir 120 Output files will be placed in directory dir. 121 122 -S stylename 123 --style stylename 124 Specify which style to use for location comments. Two styles are 125 supported: 126 127 Solaris # File: filename, line: line-number 128 GNU #: filename:line 129 130 The style name is case insensitive. GNU style is the default. 131 132 -v 133 --verbose 134 Print the names of the files being processed. 135 136 -V 137 --version 138 Print the version of pygettext and exit. 139 140 -w columns 141 --width=columns 142 Set width of output to columns. 143 144 -x filename 145 --exclude-file=filename 146 Specify a file that contains a list of strings that are not be 147 extracted from the input files. Each string to be excluded must 148 appear on a line by itself in the file. 149 150 -X filename 151 --no-docstrings=filename 152 Specify a file that contains a list of files (one per line) that 153 should not have their docstrings extracted. This is only useful in 154 conjunction with the -D option above. 155 156 If `inputfile' is -, standard input is read. 157 """) 158 159 import os 160 import imp 161 import sys 162 import glob 163 import time 164 import getopt 165 import token 166 import tokenize 167 import operator 168 169 try: 170 import kid.parser as kid_parser 171 except ImportError: 172 kid_parser = None 173 174 try: 175 from genshi.template import MarkupTemplate as GenshiMarkupTemplate 176 from genshi.filters.i18n import Translator as GenshiTranslator 177 except ImportError: 178 GenshiMarkupTemplate = None 179 180 __version__ = '1.5' 181 182 default_keywords = ['_'] 183 DEFAULTKEYWORDS = ', '.join(default_keywords) 184 185 EMPTYSTRING = '' 186 187 # The normal pot-file header. msgmerge and Emacs's po-mode work better if it's 188 # there. 189 pot_header = _('''\ 190 # SOME DESCRIPTIVE TITLE. 191 # Copyright (C) YEAR ORGANIZATION 192 # FIRST AUTHOR <EMAIL@ADDRESS>, YEAR. 193 # 194 msgid "" 195 msgstr "" 196 "Project-Id-Version: PACKAGE VERSION\\n" 197 "POT-Creation-Date: %(time)s\\n" 198 "PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n" 199 "Last-Translator: FULL NAME <EMAIL@ADDRESS>\\n" 200 "Language-Team: LANGUAGE <LL@li.org>\\n" 201 "MIME-Version: 1.0\\n" 202 "Content-Type: text/plain; charset=%(charset)s\\n" 203 "Content-Transfer-Encoding: %(charset)s\\n" 204 "Generated-By: pygettext.py %(version)s\\n" 205 206 ''') 207

208 -def usage(code, msg=''):

209 print >> sys.stderr, __doc__ % globals() 210 if msg: 211 print >> sys.stderr, msg 212 sys.exit(code)

213 214 215 escapes = [] 216

217 -def make_escapes(pass_iso8859):

218 global escapes 219 if pass_iso8859: 220 # Allow iso-8859 characters to pass through so that e.g. 'msgid 221 # "Höhe"' would result not result in 'msgid "H\366he"'. Otherwise we 222 # escape any character outside the 32..126 range. 223 mod = 128 224 else: 225 mod = 256 226 for i in range(256): 227 if 32 <= (i % mod) <= 126: 228 escapes.append(chr(i)) 229 else: 230 escapes.append("\\%03o" % i) 231 escapes[ord('\\')] = '\\\\' 232 escapes[ord('\t')] = '\\t' 233 escapes[ord('\r')] = '\\r' 234 escapes[ord('\n')] = '\\n' 235 escapes[ord('\"')] = '\\"'

236 237

238 -def escape_ascii(s):

239 """Escape all non-ascii text plus control chars and Python literals.""" 240 s = list(s) 241 for i in range(len(s)): 242 s[i] = escapes[ord(s[i])] 243 return EMPTYSTRING.join(s)

244 245

246 -def escape_unicode(s):

247 """Escape control chars and Python literals, leave non-ascii text intact.""" 248 s = s.replace('\\', '\\\\').replace('\t', '\\t').replace( 249 '\r', '\\r').replace('\n', '\\n').replace('\"', '\\"') 250 # escape control chars 251 def repl(m): 252 return "\\%03o" % ord(m.group(0))

253 return re.sub('[\001-\037]', repl, s) 254 255

256 -def safe_eval(s):

257 # unwrap quotes, safely 258 return eval(s, {'__builtins__':{}}, {})

259 260

261 -def normalize(s, escape=False):

262 # This converts the various Python string types into a format that is 263 # appropriate for .po files, namely much closer to C style. 264 lines = s.split('\n') 265 if len(lines) == 1: 266 s = '"' + escape_unicode(s) + '"' 267 else: 268 if not lines[-1]: 269 del lines[-1] 270 lines[-1] = lines[-1] + '\n' 271 for i in range(len(lines)): 272 lines[i] = escape_unicode(lines[i]) 273 lineterm = '\\n"\n"' 274 s = '""\n"' + lineterm.join(lines) + '"' 275 if isinstance(s, unicode): 276 s = s.encode('utf-8') 277 if escape: 278 def repl(m): 279 return "\\%03o" % ord(m.group(0))

280 s = re.sub('[\200-\377]', repl, s) 281 return s 282 283

284 -def containsAny(str, set):

285 """Check whether 'str' contains ANY of the chars in 'set'""" 286 return 1 in [c in str for c in set]

287 288

289 -def _visit_pyfiles(list, dirname, names):

290 """Helper for getFilesForName().""" 291 # get extension for python source files 292 if '_py_ext' not in globals(): 293 global _py_ext 294 _py_ext = [triple[0] for triple in imp.get_suffixes() 295 if triple[2] == imp.PY_SOURCE][0] 296 # don't recurse into CVS directories 297 if 'CVS' in names: 298 names.remove('CVS') 299 if '.svn' in names: 300 names.remove('.svn') 301 # add all *.py files to list 302 list.extend([os.path.join(dirname, file) for file in names 303 if os.path.splitext(file)[1] == _py_ext])

304 305

306 -def _get_modpkg_path(dotted_name, pathlist=None):

307 """Get the filesystem path for a module or a package. 308 309 Return the file system path to a file for a module, and to a directory for 310 a package. Return None if the name is not found, or is a builtin or 311 extension module. 312 """ 313 # split off top-most name 314 parts = dotted_name.split('.', 1) 315 316 if len(parts) > 1: 317 # we have a dotted path, import top-level package 318 try: 319 file, pathname, description = imp.find_module(parts[0], pathlist) 320 if file: 321 file.close() 322 except ImportError: 323 return None 324 325 # check if it's indeed a package 326 if description[2] == imp.PKG_DIRECTORY: 327 # recursively handle the remaining name parts 328 pathname = _get_modpkg_path(parts[1], [pathname]) 329 else: 330 pathname = None 331 else: 332 # plain name 333 try: 334 file, pathname, description = imp.find_module( 335 dotted_name, pathlist) 336 if file: 337 file.close() 338 if description[2] not in [imp.PY_SOURCE, imp.PKG_DIRECTORY]: 339 pathname = None 340 except ImportError: 341 pathname = None 342 343 return pathname

344 345

346 -def getFilesForName(name):

347 """Get a list of module files for a filename, a module or package name, 348 or a directory. 349 """ 350 if not os.path.exists(name): 351 # check for glob chars 352 if containsAny(name, "*?[]"): 353 files = glob.glob(name) 354 list = [] 355 for file in files: 356 list.extend(getFilesForName(file)) 357 return list 358 359 # try to find module or package 360 name = _get_modpkg_path(name) 361 if not name: 362 return [] 363 364 if os.path.isdir(name): 365 # find all python files in directory 366 list = [] 367 os.path.walk(name, _visit_pyfiles, list) 368 return list 369 elif os.path.exists(name): 370 # a single file 371 return [name] 372 373 return []

374 375

376 -def extract_genshi_strings(filename, options=None):

377 """Extract translatable strings from a Genshi template. 378 379 The extractor will get all the text inside all elements which are 380 not in the ignore list (see options) and the values of all 381 attributes named in the include list. 382 383 Options: 384 385 `ignore_tags` -- `'script style'` 386 List of element names. Content inside elements named in 387 this list is not extracted as translatable text. Can be a 388 space-separated string or a list of string. 389 `include_attrs` -- `'abbr alt label prompt standby summary title'` 390 List of attribute names. Only values of the attributes named in this 391 list are extracted as translatable text. Can be a space-separated 392 string or a list of string. 393 394 See http://genshi.edgewall.org/wiki/Documentation/0.5.x/i18n.html for 395 more information. 396 397 """ 398 399 if not GenshiMarkupTemplate: 400 raise ImportError("Genshi templating is not installed.") 401 402 if options is None: 403 options = {} 404 405 try: 406 stream = GenshiMarkupTemplate( 407 open(filename), filename=filename, filepath='.').stream 408 translator = GenshiTranslator(**options) 409 return translator.extract(stream) 410 except Exception: 411 print >> sys.stderr, "Extracting from Genshi template", filename 412 raise

413 414

415 -class TokenEater:

416

417 - def __init__(self, options):

418 self.__options = options 419 self.__messages = {} 420 self.__state = self.__waiting 421 self.__data = [] 422 self.__lineno = -1 423 self.__freshmodule = 1 424 self.__curfile = None 425 self.__encoding = None

426

427 - def __call__(self, ttype, tstring, stup, etup, line):

428 # dispatch 429 self.__state(ttype, tstring, stup[0])

430

431 - def __waiting(self, ttype, tstring, lineno):

432 opts = self.__options 433 # Do docstring extractions, if enabled 434 if opts.docstrings and not opts.nodocstrings.get(self.__curfile): 435 # module docstring? 436 if self.__freshmodule: 437 if ttype == tokenize.STRING: 438 self.__addentry(safe_eval(tstring), lineno, isdocstring=1) 439 self.__freshmodule = 0 440 elif ttype not in (tokenize.COMMENT, tokenize.NL): 441 self.__freshmodule = 0 442 return 443 # class docstring? 444 if ttype == tokenize.NAME and tstring in ('class', 'def'): 445 self.__state = self.__suiteseen 446 return 447 if ttype == tokenize.NAME and tstring in opts.keywords: 448 self.__state = self.__keywordseen

449

450 - def __suiteseen(self, ttype, tstring, lineno):

451 # ignore anything until we see the colon 452 if ttype == tokenize.OP and tstring == ':': 453 self.__state = self.__suitedocstring

454

455 - def __suitedocstring(self, ttype, tstring, lineno):

456 # ignore any intervening noise 457 if ttype == tokenize.STRING: 458 self.__addentry(safe_eval(tstring), lineno, isdocstring=1) 459 self.__state = self.__waiting 460 elif ttype not in (tokenize.NEWLINE, tokenize.INDENT, 461 tokenize.COMMENT): 462 # there was no class docstring 463 self.__state = self.__waiting

464

465 - def __keywordseen(self, ttype, tstring, lineno):

466 if ttype == tokenize.OP and tstring == '(': 467 self.__data = [] 468 self.__lineno = lineno 469 self.__state = self.__openseen 470 else: 471 self.__state = self.__waiting

472

473 - def __openseen(self, ttype, tstring, lineno):

474 if ttype == tokenize.OP and tstring == ',': 475 # first handle case when the lang is explicitely set by the user 476 if self.__data: 477 self.__addentry(EMPTYSTRING.join(self.__data)) 478 self.__state = self.__waiting 479 480 elif ttype == tokenize.OP and tstring == ')': 481 # We've seen the last of the translatable strings. Record the 482 # line number of the first line of the strings and update the list 483 # of messages seen. Reset state for the next batch. If there 484 # were no strings inside _(), then just ignore this entry. 485 if self.__data: 486 self.__addentry(EMPTYSTRING.join(self.__data)) 487 self.__state = self.__waiting 488 489 elif ttype == tokenize.STRING: 490 self.__data.append(safe_eval(tstring)) 491 492 elif ttype not in [tokenize.COMMENT, token.INDENT, token.DEDENT, 493 token.NEWLINE, tokenize.NL]: 494 # warn if we see anything else than STRING or whitespace 495 print >> sys.stderr, _( 496 '*** %(file)s:%(lineno)s: Seen unexpected token "%(token)s"' 497 ) % { 498 'token': tstring, 499 'file': self.__curfile, 500 'lineno': self.__lineno 501 } 502 self.__state = self.__waiting

503

504 - def __addentry(self, msg, lineno=None, isdocstring=0, istemplatestring=0):

505 """The tokenize module always returns unicode strings even when they 506 are in fact coded string instances. To deal with this we use a hack: 507 evaluate string's representation without leading "u" to force 508 interpration as a coded string, then we decode it using the already 509 known file encoding. 510 511 """ 512 if not istemplatestring: 513 if type(msg) is str: 514 msg = eval(repr(msg)) 515 else: 516 msg = eval(repr(msg)[1:]) 517 518 msg = msg.decode(self.__encoding) 519 520 if lineno is None: 521 lineno = self.__lineno 522 523 if not msg in self.__options.toexclude: 524 entry = (self.__curfile, lineno) 525 self.__messages.setdefault(msg, {})[entry] = isdocstring

526

527 - def set_filename(self, filename):

528 self.__curfile = filename 529 self.__freshmodule = 1

530

531 - def set_file_encoding(self, fp):

532 """Search for -*- coding: -*- magic comment to find out file encoding""" 533 self.__encoding = 'utf-8' # reset to default for each new file 534 for line in fp.readlines()[:5]: 535 m = re.match('#\s*-\*-\s+coding:\s+(\w+)\s+-\*-', line) 536 if m: 537 self.__encoding = m.group(1) 538 break 539 fp.seek(0)

540

541 - def __contains_inline_python(self, msg):

542 return '${' in msg and not '$${' in msg

543

544 - def __strip_namespace_uri(self, tag):

545 return tag.split('}', 1)[-1]

546

547 - def extract_genshi_strings(self):

548 """Extract translatable strings from a Genshi template. 549 550 See the docstring of the eponymous module function for documentation. 551 552 """ 553 if self.__curfile: 554 # XXX: Should we pass Genshi translator options from the app's 555 # configuration here (as we do in tg-admin i18n collect)? 556 for msg in extract_genshi_strings(self.__curfile): 557 lineno, text = msg[0], msg[2] 558 if text: 559 if isinstance(text, tuple): # e.g. for ngettext 560 for subtext in text: 561 if subtext: 562 self.__addentry(subtext, lineno, istemplatestring=1) 563 else: 564 self.__addentry(text, lineno, istemplatestring=1)

565

566 - def extract_kid_strings(self):

567 if not self.__curfile: 568 return 569 if not kid_parser: 570 raise ImportError("Kid templating is not installed.") 571 tag = None 572 tags = [] 573 for ev, item in kid_parser.document(self.__curfile): 574 if ev == kid_parser.TEXT: 575 if tag: 576 item = item.strip() 577 if item and not self.__contains_inline_python(item): 578 self.__addentry(item, tag, istemplatestring=1) 579 elif ev == kid_parser.START: 580 tag = item.tag 581 if isinstance(tag, basestring): 582 tag = self.__strip_namespace_uri(tag) 583 if tag in ('script', 'style'): 584 tag = None 585 else: 586 tag = None 587 tags.append(tag) 588 elif ev == kid_parser.END: 589 if tags: 590 tag = tags.pop()

591

592 - def write(self, fp):

593 options = self.__options 594 # format without tz information 595 # because %Z is timezone's name, not offset 596 # and, say, on localized Windows XP this is non-ascii string 597 timestamp = time.strftime('%Y-%m-%d %H:%M') 598 # The time stamp in the header doesn't have the same format as that 599 # generated by xgettext... 600 t = {'time': timestamp, 'version': __version__, 'charset':'utf-8'} 601 print >> fp, pot_header % t 602 # Sort the entries. First sort each particular entry's keys, then 603 # sort all the entries by their first item. 604 reverse = {} 605 for k, v in self.__messages.items(): 606 keys = v.keys() 607 keys.sort() 608 reverse.setdefault(tuple(keys), []).append((k, v)) 609 rkeys = reverse.keys() 610 rkeys.sort() 611 for rkey in rkeys: 612 rentries = reverse[rkey] 613 rentries.sort() 614 for k, v in rentries: 615 isdocstring = 0 616 # If the entry was gleaned out of a docstring, then add a 617 # comment stating so. This is to aid translators who may wish 618 # to skip translating some unimportant docstrings. 619 if reduce(operator.__add__, v.values()): 620 isdocstring = 1 621 # k is the message string, v is a dictionary-set of (filename, 622 # lineno) tuples. We want to sort the entries in v first by 623 # file name and then by line number. 624 v = v.keys() 625 v.sort() 626 if not options.writelocations: 627 pass 628 # location comments are different b/w Solaris and GNU: 629 elif options.locationstyle == options.SOLARIS: 630 for filename, lineno in v: 631 d = {'filename': filename, 'lineno': lineno} 632 print >> fp, _( 633 '# File: %(filename)s, line: %(lineno)s') % d 634 elif options.locationstyle == options.GNU: 635 # fit as many locations on one line, as long as the 636 # resulting line length doesn't exceeds 'options.width' 637 locline = '#:' 638 for filename, lineno in v: 639 d = {'filename': filename, 'lineno': lineno} 640 s = _(' %(filename)s:%(lineno)s') % d 641 if len(locline) + len(s) <= options.width: 642 locline += s 643 else: 644 print >> fp, locline 645 locline = "#:" + s 646 if len(locline) > 2: 647 print >> fp, locline 648 if isdocstring: 649 print >> fp, '#, docstring' 650 if k: # do not output empty msgid 651 print >> fp, 'msgid', normalize(k, options.escape) 652 print >> fp, 'msgstr ""\n'

653

654 -def main():

655 global default_keywords 656 try: 657 opts, args = getopt.getopt( 658 sys.argv[1:], 659 'ad:UDEhk:Kno:p:S:Vvw:x:X:', 660 ['extract-all', 'default-domain=', 'escape', 'help', 661 'keyword=', 'no-default-keywords', 662 'add-location', 'no-location', 'output=', 'output-dir=', 663 'style=', 'verbose', 'version', 'width=', 'exclude-file=', 664 'docstrings', 'no-docstrings', 'support-unicode', 665 ]) 666 except getopt.error, msg: 667 usage(1, msg) 668 669 # for holding option values 670 class Options: 671 # constants 672 GNU = 1 673 SOLARIS = 2 674 # defaults 675 extractall = 0 # FIXME: currently this option has no effect at all. 676 escape = 0 677 keywords = [] 678 outpath = '' 679 outfile = 'messages.pot' 680 writelocations = 1 681 locationstyle = GNU 682 verbose = 0 683 width = 78 684 excludefilename = '' 685 docstrings = 0 686 nodocstrings = {}

687 688 options = Options() 689 locations = {'gnu' : options.GNU, 690 'solaris' : options.SOLARIS, 691 } 692 693 # parse options 694 for opt, arg in opts: 695 if opt in ('-h', '--help'): 696 usage(0) 697 elif opt in ('-a', '--extract-all'): 698 options.extractall = 1 699 elif opt in ('-d', '--default-domain'): 700 options.outfile = arg + '.pot' 701 elif opt in ('-E', '--escape'): 702 options.escape = 1 703 elif opt in ('-D', '--docstrings'): 704 options.docstrings = 1 705 elif opt in ('-k', '--keyword'): 706 options.keywords.append(arg) 707 elif opt in ('-K', '--no-default-keywords'): 708 default_keywords = [] 709 elif opt in ('-n', '--add-location'): 710 options.writelocations = 1 711 elif opt in ('--no-location',): 712 options.writelocations = 0 713 elif opt in ('-S', '--style'): 714 options.locationstyle = locations.get(arg.lower()) 715 if options.locationstyle is None: 716 usage(1, _('Invalid value for --style: %s') % arg) 717 elif opt in ('-o', '--output'): 718 options.outfile = arg 719 elif opt in ('-p', '--output-dir'): 720 options.outpath = arg 721 elif opt in ('-v', '--verbose'): 722 options.verbose = 1 723 elif opt in ('-V', '--version'): 724 print _('pygettext.py (xgettext for Python) %s') % __version__ 725 sys.exit(0) 726 elif opt in ('-w', '--width'): 727 try: 728 options.width = int(arg) 729 except ValueError: 730 usage(1, _('--width argument must be an integer: %s') % arg) 731 elif opt in ('-x', '--exclude-file'): 732 options.excludefilename = arg 733 elif opt in ('-X', '--no-docstrings'): 734 fp = open(arg) 735 try: 736 while 1: 737 line = fp.readline() 738 if not line: 739 break 740 options.nodocstrings[line[:-1]] = 1 741 finally: 742 fp.close() 743 744 # calculate escapes 745 make_escapes(0) 746 747 # calculate all keywords 748 options.keywords.extend(default_keywords) 749 750 # initialize list of strings to exclude 751 if options.excludefilename: 752 try: 753 fp = open(options.excludefilename) 754 options.toexclude = fp.readlines() 755 fp.close() 756 except IOError: 757 print >> sys.stderr, _( 758 "Can't read --exclude-file: %s") % options.excludefilename 759 sys.exit(1) 760 else: 761 options.toexclude = [] 762 763 # resolve args to module lists 764 expanded = [] 765 for arg in args: 766 if arg == '-': 767 expanded.append(arg) 768 else: 769 expanded.extend(getFilesForName(arg)) 770 args = expanded 771 772 # slurp through all the files 773 eater = TokenEater(options) 774 for filename in args: 775 if filename == '-': 776 if options.verbose: 777 print _('Reading standard input') 778 fp = sys.stdin 779 closep = 0 780 781 else: 782 if options.verbose: 783 print _('Working on %s') % filename 784 fp = open(filename) 785 eater.set_file_encoding(fp) 786 closep = 1 787 788 try: 789 eater.set_filename(filename) 790 if os.path.splitext(filename)[-1].lower() == '.kid': 791 try: 792 eater.extract_kid_strings() 793 except Exception, e: 794 print >> sys.stderr, "Kid eater exception:", e 795 796 elif os.path.splitext(filename)[-1].lower() == '.html': 797 try: 798 eater.extract_genshi_strings() 799 except Exception, e: 800 print >> sys.stderr, "Genshi eater exception:", e 801 802 else: 803 try: 804 tokenize.tokenize(fp.readline, eater) 805 except tokenize.TokenError, e: 806 print >> sys.stderr, '%s: %s, line %d, column %d' % ( 807 e[0], filename, e[1][0], e[1][1]) 808 809 finally: 810 if closep: 811 fp.close() 812 813 # write the output 814 if options.outfile == '-': 815 fp = sys.stdout 816 closep = 0 817 818 else: 819 if options.outpath: 820 options.outfile = os.path.join(options.outpath, options.outfile) 821 # we want to truncate any existing file so w+ is the best flag mix 822 fp = open(options.outfile, 'w+') 823 closep = 1 824 825 try: 826 eater.write(fp) 827 828 finally: 829 if closep: 830 fp.close() 831 832 833 if __name__ == '__main__': 834 main() 835 # some more test strings 836 _(u'a unicode string') 837 # this one creates a warning 838 _('*** Seen unexpected token "%(token)s"') % {'token': 'test'} 839 _('more' 'than' 'one' 'string') 840

Source Code for Module turbogears.i18n.pygettext.pygettext