1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 """Module for parsing Gettext .mo files for translation.
32
33 The coding of .mo files was produced from documentation in Gettext 0.16 and
34 from observation and testing of existing .mo files in the wild.
35
36 The class does not implement any of the hashing componets of Gettext. This
37 will probably make the output file slower in some instances.
38 """
39
40 from translate.storage import base
41 from translate.storage import po
42 from translate.misc.multistring import multistring
43 import struct
44 import array
45 import re
46
47 MO_MAGIC_NUMBER = 0x950412deL
48
50 """Helper to unpack Gettext MO files into a Python string"""
51 f = open(filename)
52 s = f.read()
53 print "\\x%02x"*len(s) % tuple(map(ord, s))
54 f.close()
55
56 -class mounit(base.TranslationUnit):
57 """A class representing a .mo translation message."""
62
63 - def getcontext(self):
64 """Get the message context"""
65
66 if self.msgctxt is None:
67 return None
68 return "".join(self.msgctxt)
69
71 """Is this a header entry?"""
72 return self.source == ""
73
75 """Is this message translateable?"""
76 return bool(self.source)
77
78 -class mofile(base.TranslationStore):
79 """A class representing a .mo file."""
80 UnitClass = mounit
88
90 """Output a string representation of the MO data file"""
91
92 MESSAGES = {}
93 for unit in self.units:
94 if isinstance(unit.source, multistring):
95 source = "".join(unit.msgidcomments) + "\0".join(unit.source.strings)
96 else:
97 source = "".join(unit.msgidcomments) + unit.source
98 if unit.msgctxt:
99 source = "".join(unit.msgctxt) + "\x04" + source
100 if isinstance(unit.target, multistring):
101 target = "\0".join(unit.target.strings)
102 else:
103 target = unit.target
104 if unit.target:
105 MESSAGES[source.encode("utf-8")] = target
106 keys = MESSAGES.keys()
107
108 keys.sort()
109 offsets = []
110 ids = strs = ''
111 for id in keys:
112
113
114
115 string = MESSAGES[id]
116 if isinstance(string, unicode):
117 string = string.encode('utf-8')
118 offsets.append((len(ids), len(id), len(strs), len(string)))
119 ids = ids + id + '\0'
120 strs = strs + string + '\0'
121 output = ''
122
123
124
125 keystart = 7*4+16*len(keys)
126
127 valuestart = keystart + len(ids)
128 koffsets = []
129 voffsets = []
130
131
132 for o1, l1, o2, l2 in offsets:
133 koffsets = koffsets + [l1, o1+keystart]
134 voffsets = voffsets + [l2, o2+valuestart]
135 offsets = koffsets + voffsets
136 output = struct.pack("Iiiiiii",
137 MO_MAGIC_NUMBER,
138 0,
139 len(keys),
140 7*4,
141 7*4+len(keys)*8,
142 0, 0)
143 output = output + array.array("i", offsets).tostring()
144 output = output + ids
145 output = output + strs
146 return output
147
149 """parses the given file or file source string"""
150 if hasattr(input, 'name'):
151 self.filename = input.name
152 elif not getattr(self, 'filename', ''):
153 self.filename = ''
154 if hasattr(input, "read"):
155 mosrc = input.read()
156 input.close()
157 input = mosrc
158 little, = struct.unpack("<L", input[:4])
159 big, = struct.unpack(">L", input[:4])
160 if little == MO_MAGIC_NUMBER:
161 endian = "<"
162 elif big == MO_MAGIC_NUMBER:
163 endian = ">"
164 else:
165 raise ValueError("This is not an MO file")
166 magic, version, lenkeys, startkey, startvalue, sizehash, offsethash = struct.unpack("%sLiiiiii" % endian, input[:(7*4)])
167 if version > 1:
168 raise ValueError("Unable to process MO files with versions > 1. This is a %d version MO file" % version)
169 encoding = 'UTF-8'
170 for i in range(lenkeys):
171 nextkey = startkey+(i*2*4)
172 nextvalue = startvalue+(i*2*4)
173 klength, koffset = struct.unpack("%sii" % endian, input[nextkey:nextkey+(2*4)])
174 vlength, voffset = struct.unpack("%sii" % endian, input[nextvalue:nextvalue+(2*4)])
175 source = input[koffset:koffset+klength]
176 context = None
177
178 if "\x04" in source:
179 context, source = source.split("\x04")
180
181
182 source = multistring(source.split("\0"), encoding=encoding)
183 if source == "":
184 charset = re.search("charset=([^\\s]+)", input[voffset:voffset+vlength])
185 if charset:
186 encoding = po.encodingToUse(charset.group(1))
187 target = multistring(input[voffset:voffset+vlength].split("\0"), encoding=encoding)
188 newunit = mounit(source)
189 newunit.settarget(target)
190 if context is not None:
191 newunit.msgctxt.append(context)
192 self.addunit(newunit)
193