mirror of
				https://github.com/google/brotli
				synced 2025-10-30 07:50:02 +08:00 
			
		
		
		
	add max_length to Python streaming decompression
This commit is contained in:
		
							
								
								
									
										192
									
								
								python/_brotli.c
									
									
									
									
									
								
							
							
						
						
									
										192
									
								
								python/_brotli.c
									
									
									
									
									
								
							| @ -606,57 +606,6 @@ static PyTypeObject brotli_CompressorType = { | ||||
|   brotli_Compressor_new,                 /* tp_new */ | ||||
| }; | ||||
|  | ||||
| static PyObject* decompress_stream(BrotliDecoderState* dec, | ||||
|                                    uint8_t* input, size_t input_length) { | ||||
|   BrotliDecoderResult result; | ||||
|  | ||||
|   size_t available_in = input_length; | ||||
|   const uint8_t* next_in = input; | ||||
|  | ||||
|   size_t available_out; | ||||
|   uint8_t* next_out; | ||||
|   BlocksOutputBuffer buffer = {.list=NULL}; | ||||
|   PyObject *ret; | ||||
|  | ||||
|   if (BlocksOutputBuffer_InitAndGrow(&buffer, PY_SSIZE_T_MAX, &available_out, &next_out) < 0) { | ||||
|     goto error; | ||||
|   } | ||||
|  | ||||
|   while (1) { | ||||
|     Py_BEGIN_ALLOW_THREADS | ||||
|     result = BrotliDecoderDecompressStream(dec, | ||||
|                                            &available_in, &next_in, | ||||
|                                            &available_out, &next_out, NULL); | ||||
|     Py_END_ALLOW_THREADS | ||||
|  | ||||
|     if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) { | ||||
|       if (available_out == 0) { | ||||
|         if (BlocksOutputBuffer_Grow(&buffer, &available_out, &next_out) < 0) { | ||||
|           goto error; | ||||
|         } | ||||
|       } | ||||
|       continue; | ||||
|     } | ||||
|  | ||||
|     break; | ||||
|   } | ||||
|  | ||||
|   if (result == BROTLI_DECODER_RESULT_ERROR || available_in != 0) { | ||||
|     goto error; | ||||
|   } | ||||
|  | ||||
|   ret = BlocksOutputBuffer_Finish(&buffer, available_out); | ||||
|   if (ret != NULL) { | ||||
|     goto finally; | ||||
|   } | ||||
|  | ||||
| error: | ||||
|   BlocksOutputBuffer_OnError(&buffer); | ||||
|   ret = NULL; | ||||
| finally: | ||||
|   return ret; | ||||
| } | ||||
|  | ||||
| PyDoc_STRVAR(brotli_Decompressor_doc, | ||||
| "An object to decompress a byte string.\n" | ||||
| "\n" | ||||
| @ -669,10 +618,14 @@ PyDoc_STRVAR(brotli_Decompressor_doc, | ||||
| typedef struct { | ||||
|   PyObject_HEAD | ||||
|   BrotliDecoderState* dec; | ||||
|   uint8_t* unconsumed_data; | ||||
|   size_t unconsumed_data_length; | ||||
| } brotli_Decompressor; | ||||
|  | ||||
| static void brotli_Decompressor_dealloc(brotli_Decompressor* self) { | ||||
|   BrotliDecoderDestroyInstance(self->dec); | ||||
|   if (self->unconsumed_data) | ||||
|     free(self->unconsumed_data); | ||||
|   #if PY_MAJOR_VERSION >= 3 | ||||
|   Py_TYPE(self)->tp_free((PyObject*)self); | ||||
|   #else | ||||
| @ -688,6 +641,9 @@ static PyObject* brotli_Decompressor_new(PyTypeObject *type, PyObject *args, PyO | ||||
|     self->dec = BrotliDecoderCreateInstance(0, 0, 0); | ||||
|   } | ||||
|  | ||||
|   self->unconsumed_data = NULL; | ||||
|   self->unconsumed_data_length = 0; | ||||
|  | ||||
|   return (PyObject *)self; | ||||
| } | ||||
|  | ||||
| @ -706,6 +662,79 @@ static int brotli_Decompressor_init(brotli_Decompressor *self, PyObject *args, P | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| static PyObject* decompress_stream(brotli_Decompressor* self, | ||||
|                                    uint8_t* input, size_t input_length, Py_ssize_t max_output_length) { | ||||
|   BrotliDecoderResult result; | ||||
|  | ||||
|   size_t available_in = input_length; | ||||
|   const uint8_t* next_in = input; | ||||
|  | ||||
|   size_t available_out; | ||||
|   uint8_t* next_out; | ||||
|   uint8_t* new_tail; | ||||
|   BlocksOutputBuffer buffer = {.list=NULL}; | ||||
|   PyObject *ret; | ||||
|  | ||||
|   if (BlocksOutputBuffer_InitAndGrow(&buffer, max_output_length, &available_out, &next_out) < 0) { | ||||
|     goto error; | ||||
|   } | ||||
|  | ||||
|   while (1) { | ||||
|     Py_BEGIN_ALLOW_THREADS | ||||
|     result = BrotliDecoderDecompressStream(self->dec, | ||||
|                                            &available_in, &next_in, | ||||
|                                            &available_out, &next_out, NULL); | ||||
|     Py_END_ALLOW_THREADS | ||||
|  | ||||
|     if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) { | ||||
|       if (available_out == 0) { | ||||
|         if (buffer.allocated == PY_SSIZE_T_MAX) { | ||||
|           PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); | ||||
|           goto error; | ||||
|         } | ||||
|         if (buffer.allocated == max_output_length) { | ||||
|           // We've reached the output length limit. | ||||
|           break; | ||||
|         } | ||||
|         if (BlocksOutputBuffer_Grow(&buffer, &available_out, &next_out) < 0) { | ||||
|           goto error; | ||||
|         } | ||||
|       } | ||||
|       continue; | ||||
|     } | ||||
|  | ||||
|     if (result == BROTLI_DECODER_RESULT_ERROR || available_in != 0) { | ||||
|       available_in = 0; | ||||
|       goto error; | ||||
|     } | ||||
|  | ||||
|     break; | ||||
|   } | ||||
|  | ||||
|   ret = BlocksOutputBuffer_Finish(&buffer, available_out); | ||||
|   if (ret != NULL) { | ||||
|     goto finally; | ||||
|   } | ||||
|  | ||||
| error: | ||||
|   BlocksOutputBuffer_OnError(&buffer); | ||||
|   ret = NULL; | ||||
|  | ||||
| finally: | ||||
|   new_tail = available_in > 0 ? malloc(available_in) : NULL; | ||||
|   if (available_in > 0) { | ||||
|     memcpy(new_tail, next_in, available_in); | ||||
|   } | ||||
|   if (self->unconsumed_data) { | ||||
|     free(self->unconsumed_data); | ||||
|   } | ||||
|   self->unconsumed_data = new_tail; | ||||
|   self->unconsumed_data_length = available_in; | ||||
|  | ||||
|   return ret; | ||||
| } | ||||
|  | ||||
|  | ||||
| PyDoc_STRVAR(brotli_Decompressor_process_doc, | ||||
| "Process \"string\" for decompression, returning a string that contains \n" | ||||
| "decompressed output data.  This data should be concatenated to the output \n" | ||||
| @ -713,28 +742,38 @@ PyDoc_STRVAR(brotli_Decompressor_process_doc, | ||||
| "Some or all of the input may be kept in internal buffers for later \n" | ||||
| "processing, and the decompressed output data may be empty until enough input \n" | ||||
| "has been accumulated.\n" | ||||
| "If max_output_length is set, no more than max_output_length bytes will be\n" | ||||
| "returned. If the limit is reached, further calls to process (potentially with\n" | ||||
| "empty input) will continue to yield more data. If, after returning a string of\n" | ||||
| "the length equal to limit, can_accept_more_data() returns False, process()\n" | ||||
| "must only be called with empty input until can_accept_more_data() once again\n" | ||||
| "returns True.\n" | ||||
| "\n" | ||||
| "Signature:\n" | ||||
| "  decompress(string)\n" | ||||
| "  decompress(string, max_output_length=int)\n" | ||||
| "\n" | ||||
| "Args:\n" | ||||
| "  string (bytes): The input data\n" | ||||
| "\n" | ||||
| "Returns:\n" | ||||
| "\n""Returns:\n" | ||||
| "  The decompressed output data (bytes)\n" | ||||
| "\n" | ||||
| "Raises:\n" | ||||
| "  brotli.error: If decompression fails\n"); | ||||
|  | ||||
| static PyObject* brotli_Decompressor_process(brotli_Decompressor *self, PyObject *args) { | ||||
| static PyObject* brotli_Decompressor_process(brotli_Decompressor *self, PyObject *args, PyObject* keywds) { | ||||
|   PyObject* ret; | ||||
|   Py_buffer input; | ||||
|   int ok; | ||||
|   Py_ssize_t max_output_length = PY_SSIZE_T_MAX; | ||||
|   uint8_t* data; | ||||
|   size_t data_length; | ||||
|  | ||||
|   static char* kwlist[] = { "", "max_output_length", NULL }; | ||||
|  | ||||
| #if PY_MAJOR_VERSION >= 3 | ||||
|   ok = PyArg_ParseTuple(args, "y*:process", &input); | ||||
|   ok = PyArg_ParseTupleAndKeywords(args, keywds, "y*|n:process", kwlist, &input, &max_output_length); | ||||
| #else | ||||
|   ok = PyArg_ParseTuple(args, "s*:process", &input); | ||||
|   ok = PyArg_ParseTupleAndKeywords(args, keywds, "s*|n:process", kwlist, &input, &max_output_length); | ||||
| #endif | ||||
|  | ||||
|   if (!ok) { | ||||
| @ -745,7 +784,20 @@ static PyObject* brotli_Decompressor_process(brotli_Decompressor *self, PyObject | ||||
|     goto error; | ||||
|   } | ||||
|  | ||||
|   ret = decompress_stream(self->dec, (uint8_t*) input.buf, input.len); | ||||
|   if (self->unconsumed_data_length > 0) { | ||||
|     if (input.len > 0) { | ||||
|       PyErr_SetString(BrotliError, "process called with data when accept_more_data is False"); | ||||
|       ret = NULL; | ||||
|       goto finally; | ||||
|     } | ||||
|     data = self->unconsumed_data; | ||||
|     data_length = self->unconsumed_data_length; | ||||
|   } else { | ||||
|     data = (uint8_t*)input.buf; | ||||
|     data_length = input.len; | ||||
|   } | ||||
|  | ||||
|   ret = decompress_stream(self, data, data_length, max_output_length); | ||||
|   if (ret != NULL) { | ||||
|     goto finally; | ||||
|   } | ||||
| @ -787,13 +839,35 @@ static PyObject* brotli_Decompressor_is_finished(brotli_Decompressor *self) { | ||||
|   } | ||||
| } | ||||
|  | ||||
| PyDoc_STRVAR(brotli_Decompressor_can_accept_more_data_doc, | ||||
| "Checks if the decoder instance can accept more compressed data. If the decompress()\n" | ||||
| "method on this instance of decompressor was never called with max_length,\n" | ||||
| "this method will always return True.\n" | ||||
| "\n" | ||||
| "Signature:" | ||||
| "  can_accept_more_data()\n" | ||||
| "\n" | ||||
| "Returns:\n" | ||||
| "  True  if the decoder is ready to accept more compressed data via decompress()\n" | ||||
| "  False if the decoder needs to output some data via decompress(b'') before\n" | ||||
| "        being provided any more compressed data\n"); | ||||
|  | ||||
| static PyObject* brotli_Decompressor_can_accept_more_data(brotli_Decompressor* self) { | ||||
|   if (self->unconsumed_data_length > 0) { | ||||
|     Py_RETURN_FALSE; | ||||
|   } else { | ||||
|     Py_RETURN_TRUE; | ||||
|   } | ||||
| } | ||||
|  | ||||
| static PyMemberDef brotli_Decompressor_members[] = { | ||||
|   {NULL}  /* Sentinel */ | ||||
| }; | ||||
|  | ||||
| static PyMethodDef brotli_Decompressor_methods[] = { | ||||
|   {"process", (PyCFunction)brotli_Decompressor_process, METH_VARARGS, brotli_Decompressor_process_doc}, | ||||
|   {"process", (PyCFunction)brotli_Decompressor_process, METH_VARARGS | METH_KEYWORDS, brotli_Decompressor_process_doc}, | ||||
|   {"is_finished", (PyCFunction)brotli_Decompressor_is_finished, METH_NOARGS, brotli_Decompressor_is_finished_doc}, | ||||
|   {"can_accept_more_data", (PyCFunction)brotli_Decompressor_can_accept_more_data, METH_NOARGS, brotli_Decompressor_can_accept_more_data_doc}, | ||||
|   {NULL}  /* Sentinel */ | ||||
| }; | ||||
|  | ||||
|  | ||||
| @ -4,6 +4,7 @@ | ||||
| # See file LICENSE for detail or copy at https://opensource.org/licenses/MIT | ||||
|  | ||||
| import functools | ||||
| import os | ||||
| import unittest | ||||
|  | ||||
| from . import _test_utils | ||||
| @ -39,10 +40,51 @@ class TestDecompressor(_test_utils.TestCase): | ||||
|                     out_file.write(self.decompressor.process(data)) | ||||
|         self.assertTrue(self.decompressor.is_finished()) | ||||
|  | ||||
|     def _decompress_with_limit(self, test_data, max_output_length): | ||||
|         temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data) | ||||
|         with open(temp_uncompressed, 'wb') as out_file: | ||||
|             with open(test_data, 'rb') as in_file: | ||||
|                 chunk_iter = iter(functools.partial(in_file.read, 10 * 1024), b'') | ||||
|                 while not self.decompressor.is_finished(): | ||||
|                     data = b'' | ||||
|                     if self.decompressor.can_accept_more_data(): | ||||
|                         data = next(chunk_iter, b'') | ||||
|                     decompressed_data = self.decompressor.process(data, max_output_length=max_output_length) | ||||
|                     self.assertTrue(len(decompressed_data) <= max_output_length) | ||||
|                     out_file.write(decompressed_data) | ||||
|                 self.assertTrue(next(chunk_iter, None) == None) | ||||
|  | ||||
|     def _test_decompress(self, test_data): | ||||
|         self._decompress(test_data) | ||||
|         self._check_decompression(test_data) | ||||
|  | ||||
|     def _test_decompress_with_limit(self, test_data): | ||||
|         self._decompress_with_limit(test_data, max_output_length=20) | ||||
|         self._check_decompression(test_data) | ||||
|  | ||||
|     def test_too_much_input(self): | ||||
|         with open(os.path.join(_test_utils.TESTDATA_DIR, "zerosukkanooa.compressed"), 'rb') as in_file: | ||||
|             compressed = in_file.read() | ||||
|             self.decompressor.process(compressed[:-1], max_output_length=1) | ||||
|             # the following assertion checks whether the test setup is correct | ||||
|             self.assertTrue(not self.decompressor.can_accept_more_data()) | ||||
|             with self.assertRaises(brotli.error): | ||||
|                 self.decompressor.process(compressed[-1:]) | ||||
|  | ||||
|     def test_changing_limit(self): | ||||
|         test_data = os.path.join(_test_utils.TESTDATA_DIR, "zerosukkanooa.compressed") | ||||
|         temp_uncompressed = _test_utils.get_temp_uncompressed_name(test_data) | ||||
|         with open(temp_uncompressed, 'wb') as out_file: | ||||
|             with open(test_data, 'rb') as in_file: | ||||
|                 compressed = in_file.read() | ||||
|                 uncompressed = self.decompressor.process(compressed[:-1], max_output_length=1) | ||||
|                 self.assertTrue(len(uncompressed) <= 1) | ||||
|                 out_file.write(uncompressed) | ||||
|                 while not self.decompressor.can_accept_more_data(): | ||||
|                     out_file.write(self.decompressor.process(b'')) | ||||
|                 out_file.write(self.decompressor.process(compressed[-1:])) | ||||
|         self._check_decompression(test_data) | ||||
|  | ||||
|     def test_garbage_appended(self): | ||||
|         with self.assertRaises(brotli.error): | ||||
|             self.decompressor.process(brotli.compress(b'a') + b'a') | ||||
|  | ||||
							
								
								
									
										
											BIN
										
									
								
								tests/testdata/zerosukkanooa
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								tests/testdata/zerosukkanooa
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										
											BIN
										
									
								
								tests/testdata/zerosukkanooa.compressed
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								tests/testdata/zerosukkanooa.compressed
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
		Reference in New Issue
	
	Block a user
	 Robert Obryk
					Robert Obryk