Skip to content

Commit

Permalink
Merge pull request #206 from fintelia/simplify-jpeg
Browse files Browse the repository at this point in the history
Simplify and avoid allocation in jpeg decompression
  • Loading branch information
fintelia authored Jun 14, 2023
2 parents 473868a + 24d203c commit daa0e3e
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 105 deletions.
31 changes: 26 additions & 5 deletions src/decoder/image.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use super::ifd::{Directory, Value};
use super::stream::{ByteOrder, DeflateReader, JpegReader, LZWReader, PackBitsReader};
use super::stream::{ByteOrder, DeflateReader, LZWReader, PackBitsReader};
use super::tag_reader::TagReader;
use super::{fp_predict_f32, fp_predict_f64, DecodingBuffer, Limits};
use super::{stream::SmartReader, ChunkType};
Expand Down Expand Up @@ -356,7 +356,7 @@ impl Image {
photometric_interpretation: PhotometricInterpretation,
compression_method: CompressionMethod,
compressed_length: u64,
jpeg_tables: Option<Arc<Vec<u8>>>,
jpeg_tables: Option<&[u8]>,
) -> TiffResult<Box<dyn Read + 'r>> {
Ok(match compression_method {
CompressionMethod::None => Box::new(reader),
Expand All @@ -374,7 +374,29 @@ impl Image {
));
}

let jpeg_reader = JpegReader::new(reader, compressed_length, jpeg_tables)?;
// Construct new jpeg_reader wrapping a SmartReader.
//
// JPEG compression in TIFF allows saving quantization and/or huffman tables in one
// central location. These `jpeg_tables` are simply prepended to the remaining jpeg image data.
// Because these `jpeg_tables` start with a `SOI` (HEX: `0xFFD8`) or __start of image__ marker
// which is also at the beginning of the remaining JPEG image data and would
// confuse the JPEG renderer, one of these has to be taken off. In this case the first two
// bytes of the remaining JPEG data is removed because it follows `jpeg_tables`.
// Similary, `jpeg_tables` ends with a `EOI` (HEX: `0xFFD9`) or __end of image__ marker,
// this has to be removed as well (last two bytes of `jpeg_tables`).
let jpeg_reader = match jpeg_tables {
Some(jpeg_tables) => {
let mut reader = reader.take(compressed_length);
reader.read_exact(&mut [0; 2])?;

Box::new(
Cursor::new(&jpeg_tables[..jpeg_tables.len() - 2])
.chain(reader.take(compressed_length)),
) as Box<dyn Read>
}
None => Box::new(reader.take(compressed_length)),
};

let mut decoder = jpeg::Decoder::new(jpeg_reader);

match photometric_interpretation {
Expand Down Expand Up @@ -582,13 +604,12 @@ impl Image {

let padding_right = chunk_dims.0 - data_dims.0;

let jpeg_tables = self.jpeg_tables.clone();
let mut reader = Self::create_reader(
reader,
photometric_interpretation,
compression_method,
*compressed_bytes,
jpeg_tables,
self.jpeg_tables.as_deref().map(|a| &**a),
)?;

if output_width == data_dims.0 as usize && padding_right == 0 {
Expand Down
101 changes: 1 addition & 100 deletions src/decoder/stream.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
//! All IO functionality needed for TIFF decoding
use std::convert::TryFrom;
use std::io::{self, BufRead, BufReader, Read, Seek, SeekFrom, Take};
use std::sync::Arc;
use std::io::{self, BufRead, BufReader, Read, Seek, Take};

/// Byte order of the TIFF file.
#[derive(Clone, Copy, Debug)]
Expand Down Expand Up @@ -183,104 +182,6 @@ impl<R: Read> Read for LZWReader<R> {
}
}

///
/// ## JPEG Reader (for "new-style" JPEG format (TIFF compression tag 7))
///
pub(crate) struct JpegReader {
jpeg_tables: Option<Arc<Vec<u8>>>,

buffer: io::Cursor<Vec<u8>>,

offset: usize,
}

impl JpegReader {
/// Constructs new JpegReader wrapping a SmartReader.
/// Because JPEG compression in TIFF allows to save quantization and/or huffman tables in one
/// central location, the constructor accepts this data as `jpeg_tables` here containing either
/// or both.
/// These `jpeg_tables` are simply prepended to the remaining jpeg image data.
/// Because these `jpeg_tables` start with a `SOI` (HEX: `0xFFD8`) or __start of image__ marker
/// which is also at the beginning of the remaining JPEG image data and would
/// confuse the JPEG renderer, one of these has to be taken off. In this case the first two
/// bytes of the remaining JPEG data is removed because it follows `jpeg_tables`.
/// Similary, `jpeg_tables` ends with a `EOI` (HEX: `0xFFD9`) or __end of image__ marker,
/// this has to be removed as well (last two bytes of `jpeg_tables`).
pub fn new<R: Read>(
mut reader: R,
length: u64,
jpeg_tables: Option<Arc<Vec<u8>>>,
) -> io::Result<JpegReader> {
// Read jpeg image data
let mut segment = vec![0; length as usize];

reader.read_exact(&mut segment[..])?;

match jpeg_tables {
Some(jpeg_tables) => {
assert!(
jpeg_tables.len() >= 2,
"jpeg_tables, if given, must be at least 2 bytes long. Got {:?}",
jpeg_tables
);

assert!(
length >= 2,
"if jpeg_tables is given, length must be at least 2 bytes long, got {}",
length
);

let mut buffer = io::Cursor::new(segment);
// Skip the first two bytes (marker bytes)
buffer.seek(SeekFrom::Start(2))?;

Ok(JpegReader {
buffer,
jpeg_tables: Some(jpeg_tables),
offset: 0,
})
}
None => Ok(JpegReader {
buffer: io::Cursor::new(segment),
jpeg_tables: None,
offset: 0,
}),
}
}
}

impl Read for JpegReader {
// #[inline]
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let mut start = 0;

if let Some(jpeg_tables) = &self.jpeg_tables {
if jpeg_tables.len() - 2 > self.offset {
// Read (rest of) jpeg_tables to buf (without the last two bytes)
let size_remaining = jpeg_tables.len() - self.offset - 2;
let to_copy = size_remaining.min(buf.len());

buf[start..start + to_copy]
.copy_from_slice(&jpeg_tables[self.offset..self.offset + to_copy]);

self.offset += to_copy;

if to_copy == buf.len() {
return Ok(to_copy);
}

start += to_copy;
}
}

let read = self.buffer.read(&mut buf[start..])?;
self.offset += read;

Ok(read + start)
}
}

///
/// ## PackBits Reader
///
Expand Down

0 comments on commit daa0e3e

Please sign in to comment.