1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 """Module for parsing Qt .qm files
24
25 @note: based on documentation from Gettext's .qm implementation
26 (see write-qt.c) and on observation of the output of lrelease.
27 @note: Certain deprecated section tags are not implemented. These will break
28 and print out the missing tag. They are easy to implement and should follow
29 the structure in 03 (Translation). We could find no examples that use these
30 so we'd rather leave it unimplemented until we actually have test data.
31 @note: Many .qm files are unable to be parsed as they do not have the source
32 text. We assume that since they use a hash table to lookup the data there is
33 actually no need for the source text. It seems however that in Qt4's lrelease
34 all data is included in the resultant .qm file.
35 @todo: We can only parse, not create, a .qm file. The main issue is that we
36 need to implement the hashing algorithm (which seems to be identical to the
37 Gettext hash algorithm). Unlike Gettext it seems that the hash is required,
38 but that has not been validated.
39 @todo: The code can parse files correctly. But it could be cleaned up to be
40 more readable, especially the part that breaks the file into sections.
41
42 U{http://qt.gitorious.org/+kde-developers/qt/kde-qt/blobs/master/tools/linguist/shared/qm.cpp}
43 U{Plural information<http://qt.gitorious.org/+kde-developers/qt/kde-qt/blobs/master/tools/linguist/shared/numerus.cpp>}
44 U{QLocale languages<http://docs.huihoo.com/qt/4.5/qlocale.html#Language-enum>}
45 """
46
47 import codecs
48 import struct
49 import sys
50
51 from translate.storage import base
52 from translate.misc.multistring import multistring
53
54 QM_MAGIC_NUMBER = (0x3CB86418L, 0xCAEF9C95L, 0xCD211CBFL, 0x60A1BDDDL)
55
56
58 """Helper to unpack Qt .qm files into a Python string"""
59 f = open(file_)
60 s = f.read()
61 print "\\x%02x" * len(s) % tuple(map(ord, s))
62 f.close()
63
64
65 -class qmunit(base.TranslationUnit):
66 """A class representing a .qm translation message."""
67
70
71
72 -class qmfile(base.TranslationStore):
73 """A class representing a .qm file."""
74 UnitClass = qmunit
75 Name = _("Qt .qm file")
76 Mimetypes = ["application/x-qm"]
77 Extensions = ["qm"]
78 _binary = True
79
87
89 """Output a string representation of the .qm data file"""
90 return ""
91
93 """parses the given file or file source string"""
94 if hasattr(input, 'name'):
95 self.filename = input.name
96 elif not getattr(self, 'filename', ''):
97 self.filename = ''
98 if hasattr(input, "read"):
99 qmsrc = input.read()
100 input.close()
101 input = qmsrc
102 if len(input) < 16:
103 raise ValueError("This is not a .qm file: file empty or too small")
104 magic = struct.unpack(">4L", input[:16])
105 if magic != QM_MAGIC_NUMBER:
106 raise ValueError("This is not a .qm file: invalid magic number")
107 startsection = 16
108 sectionheader = 5
109 while startsection < len(input):
110 section_type, length = struct.unpack(">bL", input[startsection:startsection + sectionheader])
111 if section_type == 0x42:
112
113 hashash = True
114 hash_start = startsection + sectionheader
115 hash_data = struct.unpack(">%db" % length, input[startsection + sectionheader:startsection + sectionheader + length])
116 elif section_type == 0x69:
117
118 hasmessages = True
119 messages_start = startsection + sectionheader
120 messages_data = struct.unpack(">%db" % length, input[startsection + sectionheader:startsection + sectionheader + length])
121 elif section_type == 0x2f:
122
123 hascontexts = True
124 contexts_start = startsection + sectionheader
125 contexts_data = struct.unpack(">%db" % length, input[startsection + sectionheader:startsection + sectionheader + length])
126 startsection = startsection + sectionheader + length
127 pos = messages_start
128 source = target = None
129 while pos < messages_start + len(messages_data):
130 subsection, = struct.unpack(">b", input[pos:pos + 1])
131 if subsection == 0x01:
132
133 pos = pos + 1
134 if not source is None and not target is None:
135 newunit = self.addsourceunit(source)
136 newunit.target = target
137 source = target = None
138 else:
139 raise ValueError("Old .qm format with no source defined")
140 continue
141
142 pos = pos + 1
143 length, = struct.unpack(">l", input[pos:pos + 4])
144 if subsection == 0x03:
145 if length != -1:
146 raw, = struct.unpack(">%ds" % length,
147 input[pos + 4:pos + 4 + length])
148 string, templen = codecs.utf_16_be_decode(raw)
149 if target:
150 target.strings.append(string)
151 else:
152 target = multistring(string)
153 pos = pos + 4 + length
154 else:
155 target = ""
156 pos = pos + 4
157
158 elif subsection == 0x06:
159 source = input[pos + 4:pos + 4 + length].decode('iso-8859-1')
160
161 pos = pos + 4 + length
162 elif subsection == 0x07:
163 context = input[pos + 4:pos + 4 + length].decode('iso-8859-1')
164
165 pos = pos + 4 + length
166 elif subsection == 0x08:
167 comment = input[pos + 4:pos + 4 + length]
168
169 pos = pos + 4 + length
170 elif subsection == 0x05:
171 hash = input[pos:pos + 4]
172
173 pos = pos + 4
174 else:
175 if subsection == 0x02:
176 subsection_name = "SourceText16"
177 elif subsection == 0x04:
178 subsection_name = "Context16"
179 else:
180 subsection_name = "Unkown"
181 print >> sys.stderr, "Unimplemented: %s %s" % \
182 (subsection, subsection_name)
183 return
184
186 raise Exception("Writing of .qm files is not supported yet")
187