Line data Source code
1 : /*
2 : Samba Unix SMB/CIFS implementation.
3 :
4 : Python bindings for compression functions.
5 :
6 : Copyright (C) Petr Viktorin 2015
7 : Copyright (C) Douglas Bagnall 2022
8 :
9 : ** NOTE! The following LGPL license applies to the talloc
10 : ** library. This does NOT imply that all of Samba is released
11 : ** under the LGPL
12 :
13 : This library is free software; you can redistribute it and/or
14 : modify it under the terms of the GNU Lesser General Public
15 : License as published by the Free Software Foundation; either
16 : version 3 of the License, or (at your option) any later version.
17 :
18 : This library is distributed in the hope that it will be useful,
19 : but WITHOUT ANY WARRANTY; without even the implied warranty of
20 : MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 : Lesser General Public License for more details.
22 :
23 : You should have received a copy of the GNU Lesser General Public
24 : License along with this library; if not, see <http://www.gnu.org/licenses/>.
25 : */
26 :
27 : #include "includes.h"
28 : #include <talloc.h>
29 : #include <Python.h>
30 : #include "lzxpress.h"
31 : #include "lzxpress_huffman.h"
32 :
33 : /* CompressionError is filled out in module init */
34 : static PyObject *CompressionError = NULL;
35 :
36 0 : static PyObject *plain_compress(PyObject *mod, PyObject *args)
37 : {
38 0 : uint8_t *src = NULL;
39 : Py_ssize_t src_len;
40 0 : char *dest = NULL;
41 : Py_ssize_t dest_len;
42 0 : PyObject *dest_obj = NULL;
43 : size_t alloc_len;
44 : int ret;
45 :
46 0 : if (!PyArg_ParseTuple(args, "s#", &src, &src_len)) {
47 0 : return NULL;
48 : }
49 :
50 : /*
51 : * 9/8 + 4 is the worst case growth, but we add room.
52 : *
53 : * alloc_len can't overflow as src_len is ssize_t while alloc_len is
54 : * size_t.
55 : */
56 0 : alloc_len = src_len + src_len / 8 + 500;
57 :
58 0 : dest_obj = PyBytes_FromStringAndSize(NULL, alloc_len);
59 0 : if (dest_obj == NULL) {
60 0 : return NULL;
61 : }
62 0 : dest = PyBytes_AS_STRING(dest_obj);
63 :
64 0 : dest_len = lzxpress_compress(src,
65 : src_len,
66 : (uint8_t *)dest,
67 : alloc_len);
68 0 : if (dest_len < 0) {
69 0 : PyErr_SetString(CompressionError, "unable to compress data");
70 0 : Py_DECREF(dest_obj);
71 0 : return NULL;
72 : }
73 :
74 0 : ret = _PyBytes_Resize(&dest_obj, dest_len);
75 0 : if (ret != 0) {
76 : /*
77 : * Don't try to free dest_obj, as we're in deep MemoryError
78 : * territory here.
79 : */
80 0 : return NULL;
81 : }
82 0 : return dest_obj;
83 : }
84 :
85 :
86 0 : static PyObject *plain_decompress(PyObject *mod, PyObject *args)
87 : {
88 0 : uint8_t *src = NULL;
89 : Py_ssize_t src_len;
90 0 : char *dest = NULL;
91 : Py_ssize_t dest_len;
92 0 : PyObject *dest_obj = NULL;
93 0 : Py_ssize_t alloc_len = 0;
94 0 : Py_ssize_t given_len = 0;
95 : int ret;
96 :
97 0 : if (!PyArg_ParseTuple(args, "s#|n", &src, &src_len, &given_len)) {
98 0 : return NULL;
99 : }
100 0 : if (given_len != 0) {
101 : /*
102 : * With plain decompression, we don't *need* the exact output
103 : * size (as we do with LZ77+Huffman), but it certainly helps
104 : * when guessing the size.
105 : */
106 0 : alloc_len = given_len;
107 0 : } else if (src_len > UINT32_MAX) {
108 : /*
109 : * The underlying decompress function will reject this, but by
110 : * checking here we can give a better message and be clearer
111 : * about overflow risks.
112 : *
113 : * Note, the limit is actually the smallest of UINT32_MAX and
114 : * SSIZE_MAX, but src_len is ssize_t so it already can't
115 : * exceed that.
116 : */
117 0 : PyErr_Format(CompressionError,
118 : "The maximum size for compressed data is 4GB "
119 : "cannot decompress %zu bytes.", src_len);
120 : } else {
121 : /*
122 : * The data can expand massively (though not beyond the
123 : * 4GB limit) so we guess a big number for small inputs
124 : * (we expect small inputs), and a relatively conservative
125 : * number for big inputs.
126 : */
127 0 : if (src_len <= 3333333) {
128 0 : alloc_len = 10000000;
129 0 : } else if (src_len > UINT32_MAX / 3) {
130 0 : alloc_len = UINT32_MAX;
131 : } else {
132 0 : alloc_len = src_len * 3;
133 : }
134 : }
135 :
136 0 : dest_obj = PyBytes_FromStringAndSize(NULL, alloc_len);
137 0 : if (dest_obj == NULL) {
138 0 : return NULL;
139 : }
140 0 : dest = PyBytes_AS_STRING(dest_obj);
141 :
142 0 : dest_len = lzxpress_decompress(src,
143 : src_len,
144 : (uint8_t *)dest,
145 : alloc_len);
146 0 : if (dest_len < 0) {
147 0 : if (alloc_len == given_len) {
148 0 : PyErr_Format(CompressionError,
149 : "unable to decompress data into a buffer "
150 : "of %zd bytes.", alloc_len);
151 : } else {
152 0 : PyErr_Format(CompressionError,
153 : "unable to decompress data into a buffer "
154 : "of %zd bytes. If you know the length, "
155 : "supply it as the second argument.",
156 : alloc_len);
157 : }
158 0 : Py_DECREF(dest_obj);
159 0 : return NULL;
160 : }
161 :
162 0 : ret = _PyBytes_Resize(&dest_obj, dest_len);
163 0 : if (ret != 0) {
164 : /*
165 : * Don't try to free dest_obj, as we're in deep MemoryError
166 : * territory here.
167 : */
168 0 : return NULL;
169 : }
170 0 : return dest_obj;
171 : }
172 :
173 :
174 :
175 0 : static PyObject *huffman_compress(PyObject *mod, PyObject *args)
176 : {
177 0 : uint8_t *src = NULL;
178 : Py_ssize_t src_len;
179 0 : char *dest = NULL;
180 : Py_ssize_t dest_len;
181 0 : PyObject *dest_obj = NULL;
182 : size_t alloc_len;
183 : int ret;
184 : struct lzxhuff_compressor_mem cmp_mem;
185 :
186 0 : if (!PyArg_ParseTuple(args, "s#", &src, &src_len)) {
187 0 : return NULL;
188 : }
189 : /*
190 : * worst case is roughly 256 per 64k or less.
191 : *
192 : * alloc_len won't overflow as src_len is ssize_t while alloc_len is
193 : * size_t.
194 : */
195 0 : alloc_len = src_len + src_len / 8 + 500;
196 :
197 0 : dest_obj = PyBytes_FromStringAndSize(NULL, alloc_len);
198 0 : if (dest_obj == NULL) {
199 0 : return NULL;
200 : }
201 0 : dest = PyBytes_AS_STRING(dest_obj);
202 :
203 0 : dest_len = lzxpress_huffman_compress(&cmp_mem,
204 : src,
205 : src_len,
206 : (uint8_t *)dest,
207 : alloc_len);
208 0 : if (dest_len < 0) {
209 0 : PyErr_SetString(CompressionError, "unable to compress data");
210 0 : Py_DECREF(dest_obj);
211 0 : return NULL;
212 : }
213 :
214 0 : ret = _PyBytes_Resize(&dest_obj, dest_len);
215 0 : if (ret != 0) {
216 0 : return NULL;
217 : }
218 0 : return dest_obj;
219 : }
220 :
221 :
222 0 : static PyObject *huffman_decompress(PyObject *mod, PyObject *args)
223 : {
224 0 : uint8_t *src = NULL;
225 : Py_ssize_t src_len;
226 0 : char *dest = NULL;
227 : Py_ssize_t dest_len;
228 0 : PyObject *dest_obj = NULL;
229 0 : Py_ssize_t given_len = 0;
230 : /*
231 : * Here it is always necessary to supply the exact length.
232 : */
233 :
234 0 : if (!PyArg_ParseTuple(args, "s#n", &src, &src_len, &given_len)) {
235 0 : return NULL;
236 : }
237 :
238 0 : dest_obj = PyBytes_FromStringAndSize(NULL, given_len);
239 0 : if (dest_obj == NULL) {
240 0 : return NULL;
241 : }
242 0 : dest = PyBytes_AS_STRING(dest_obj);
243 :
244 0 : dest_len = lzxpress_huffman_decompress(src,
245 : src_len,
246 : (uint8_t *)dest,
247 : given_len);
248 0 : if (dest_len != given_len) {
249 0 : PyErr_Format(CompressionError,
250 : "unable to decompress data into a %zd bytes.",
251 : given_len);
252 0 : Py_DECREF(dest_obj);
253 0 : return NULL;
254 : }
255 : /* no resize here */
256 0 : return dest_obj;
257 : }
258 :
259 :
260 : static PyMethodDef mod_methods[] = {
261 : { "plain_compress", (PyCFunction)plain_compress, METH_VARARGS,
262 : "compress bytes using lzxpress plain compression"},
263 : { "plain_decompress", (PyCFunction)plain_decompress, METH_VARARGS,
264 : "decompress lzxpress plain compressed bytes"},
265 : { "huffman_compress", (PyCFunction)huffman_compress, METH_VARARGS,
266 : "compress bytes using lzxpress plain compression"},
267 : { "huffman_decompress", (PyCFunction)huffman_decompress, METH_VARARGS,
268 : "decompress lzxpress plain compressed bytes"},
269 : {0}
270 : };
271 :
272 :
273 : #define MODULE_DOC PyDoc_STR("LZXpress compresssion/decompression bindings")
274 :
275 : static struct PyModuleDef moduledef = {
276 : PyModuleDef_HEAD_INIT,
277 : .m_name = "compression",
278 : .m_doc = MODULE_DOC,
279 : .m_size = -1,
280 : .m_methods = mod_methods,
281 : };
282 :
283 :
284 81 : static PyObject *module_init(void)
285 : {
286 81 : PyObject *m = PyModule_Create(&moduledef);
287 81 : if (m == NULL) {
288 0 : return NULL;
289 : }
290 :
291 81 : CompressionError = PyErr_NewException(
292 : "compression.CompressionError",
293 : PyExc_Exception,
294 : NULL);
295 81 : PyModule_AddObject(m, "CompressionError", CompressionError);
296 :
297 81 : return m;
298 : }
299 :
300 : PyMODINIT_FUNC PyInit_compression(void);
301 81 : PyMODINIT_FUNC PyInit_compression(void)
302 : {
303 81 : return module_init();
304 : }
|