Package translate :: Package filters :: Module prefilters
[hide private]
[frames] | no frames]

Source Code for Module translate.filters.prefilters

  1  #!/usr/bin/env python 
  2  # -*- coding: utf-8 -*- 
  3  #  
  4  # Copyright 2004-2006 Zuza Software Foundation 
  5  #  
  6  # This file is part of translate. 
  7  # 
  8  # translate is free software; you can redistribute it and/or modify 
  9  # it under the terms of the GNU General Public License as published by 
 10  # the Free Software Foundation; either version 2 of the License, or 
 11  # (at your option) any later version. 
 12  #  
 13  # translate is distributed in the hope that it will be useful, 
 14  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
 16  # GNU General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with translate; if not, write to the Free Software 
 20  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA 
 21   
 22  """This is a set of string filters that strings can be passed through before 
 23  certain tests.""" 
 24   
 25  from translate.filters import decoration 
 26  from translate.misc import quote 
 27  import re 
 28   
29 -def removekdecomments(str1):
30 """removed kde-style po comments i.e. starting with _: and ending with litteral \\n""" 31 iskdecomment = False 32 lines = str1.split("\n") 33 removelines = [] 34 for linenum in range(len(lines)): 35 line = lines[linenum] 36 if line.startswith("_:"): 37 lines[linenum] = "" 38 iskdecomment = True 39 if iskdecomment: 40 removelines.append(linenum) 41 if line.strip() and not iskdecomment: 42 break 43 if iskdecomment and line.strip().endswith("\\n"): 44 iskdecomment = False 45 lines = [lines[linenum] for linenum in range(len(lines)) if linenum not in removelines] 46 return "\n".join(lines)
47 48 ignoreaccelerators = [] 49
50 -def filteraccelerators(accelmarker):
51 """returns a function that filters accelerators marked using accelmarker in strings""" 52 if accelmarker is None: accelmarkerlen = 0 53 else: accelmarkerlen = len(accelmarker) 54 def filtermarkedaccelerators(str1): 55 """modifies the accelerators in str1 marked with a given marker, using a given filter""" 56 acclocs, badlocs = decoration.findaccelerators(str1, accelmarker, ignoreaccelerators) 57 fstr1, pos = "", 0 58 for accelstart, accelerator in acclocs: 59 fstr1 += str1[pos:accelstart] 60 fstr1 += accelerator 61 pos = accelstart + accelmarkerlen + len(accelerator) 62 fstr1 += str1[pos:] 63 return fstr1
64 return filtermarkedaccelerators 65
66 -def varname(variable, startmarker, endmarker):
67 """a simple variable filter that returns the variable name without the marking punctuation""" 68 return variable 69 # if the punctuation were included, we'd do the following: 70 if startmarker is None: 71 return variable[:variable.rfind(endmarker)] 72 elif endmarker is None: 73 return variable[variable.find(startmarker)+len(startmarker):] 74 else: 75 return variable[variable.find(startmarker)+len(startmarker):variable.rfind(endmarker)]
76
77 -def varnone(variable, startmarker, endmarker):
78 """a simple variable filter that returns an emoty string""" 79 return ""
80
81 -def filtervariables(startmarker, endmarker, varfilter):
82 """returns a function that filters variables marked using startmarker and 83 endmarker in strings""" 84 if startmarker is None: 85 startmarkerlen = 0 86 else: 87 startmarkerlen = len(startmarker) 88 if endmarker is None: 89 endmarkerlen = 0 90 elif type(endmarker) == int: 91 endmarkerlen = 0 92 else: 93 endmarkerlen = len(endmarker) 94 95 def filtermarkedvariables(str1): 96 """modifies the variables in str1 marked with a given marker, using a given filter""" 97 varlocs = decoration.findmarkedvariables(str1, startmarker, endmarker) 98 fstr1, pos = "", 0 99 for varstart, variable in varlocs: 100 fstr1 += str1[pos:varstart] 101 fstr1 += varfilter(variable, startmarker, endmarker) 102 pos = varstart + startmarkerlen + len(variable) + endmarkerlen 103 fstr1 += str1[pos:] 104 return fstr1
105 return filtermarkedvariables 106 107 # a list of special words with punctuation 108 # all apostrophes in the middle of the word are handled already 109 wordswithpunctuation = ["'n","'t" # Afrikaans 110 ] 111 # map all the words to their non-punctified equivalent 112 wordswithpunctuation = dict([(word, filter(str.isalnum, word)) for word in wordswithpunctuation]) 113
114 -def filterwordswithpunctuation(str1):
115 """goes through a list of known words that have punctuation and removes the 116 punctuation from them""" 117 occurrences = [] 118 for word, replacement in wordswithpunctuation.iteritems(): 119 occurrences.extend([(pos, word, replacement) for pos in quote.find_all(str1, word)]) 120 for match in re.finditer("(?u)\w+'\w+", str1): 121 word = match.group() 122 if isinstance(word, unicode): 123 replacement = filter(unicode.isalnum, word) 124 else: 125 replacement = filter(str.isalnum, word) 126 occurrences.append((match.start(), word, replacement)) 127 occurrences.sort() 128 replacements = [] 129 for pos, word, replacement in occurrences: 130 previouschar = (pos == 0) and " " or str1[pos-1] 131 nextchar = (pos+len(word) == len(str1)) and " " or str1[pos+len(word)] 132 if (previouschar.isspace() or previouschar == '"') and (nextchar.isspace() or nextchar == '"'): 133 replacements.append((pos, word, replacement)) 134 if replacements: 135 lastpos = 0 136 newstr1 = "" 137 for pos, word, replacement in replacements: 138 newstr1 += str1[lastpos:pos] 139 newstr1 += replacement 140 lastpos = pos + len(word) 141 newstr1 += str1[lastpos:] 142 return newstr1 143 else: 144 return str1
145