Coverage for nexusLIMS/extractors/plugins/preview_generators/text_preview.py: 100%
86 statements
« prev ^ index » next coverage.py v7.11.3, created at 2026-03-24 05:23 +0000
« prev ^ index » next coverage.py v7.11.3, created at 2026-03-24 05:23 +0000
1"""Text file preview generator."""
3import logging
4import textwrap
5from pathlib import Path
6from typing import ClassVar, Union
8import matplotlib.pyplot as plt
9from matplotlib.figure import Figure
10from PIL import Image
12from nexusLIMS.extractors.base import ExtractionContext
14_logger = logging.getLogger(__name__)
16_LANCZOS = Image.Resampling.LANCZOS
18# Constants for text preview formatting
19_MAX_ROWS_NOTE = 18 # Maximum rows for note-style text
20_MAX_ROWS_DATA = 17 # Maximum rows for data-style text
21_MAX_COLS = 44 # Maximum columns for text display
22_DEFAULT_SIZE = 5 # default size in inches for the preview
25def _pad_to_square(im_path: Path, new_width: int = 500):
26 """
27 Pad an image to square.
29 Helper method to pad an image saved on disk to a square with size
30 ``width x width``. This ensures consistent display on the front-end web
31 page. Increasing the size of a dimension is done by padding with empty
32 space. The original image is overwritten.
34 Method adapted from:
35 https://jdhao.github.io/2017/11/06/resize-image-to-square-with-padding/
37 Parameters
38 ----------
39 im_path
40 The path to the image that should be resized/padded
41 new_width
42 Desired output width/height of the image (in pixels)
43 """
44 image = Image.open(im_path)
45 old_size = image.size # old_size[0] is in (width, height) format
46 ratio = float(new_width) / max(old_size)
47 new_size = tuple(int(x * ratio) for x in old_size)
48 image = image.resize(new_size, _LANCZOS)
50 new_im = Image.new("RGBA", (new_width, new_width))
51 new_im.paste(
52 image,
53 ((new_width - new_size[0]) // 2, (new_width - new_size[1]) // 2),
54 )
55 new_im.save(im_path)
58def text_to_thumbnail(
59 f: Path,
60 out_path: Path,
61 output_size: int = 500,
62) -> Union[Figure, bool]:
63 """
64 Generate a preview thumbnail from a text file.
66 For a text file, the contents will be formatted and written to a 500x500
67 pixel jpg image of size 5 in by 5 in.
69 If the text file has many newlines, it is probably data and the first 42
70 characters of each of the first 20 lines of the text file will be written
71 to the image.
73 If the text file has a few (or fewer) newlines, it is probably a manually
74 generated note and the text will be written to a 42 column, 18 row box
75 until the space is exhausted.
77 Parameters
78 ----------
79 f
80 The path of a text file for which a thumbnail should be generated.
81 out_path
82 A path to the desired thumbnail filename. All formats supported by
83 :py:meth:`~matplotlib.figure.Figure.savefig` can be used.
84 output_size : int
85 The pixel width (and height, since the image is padded to square) of
86 the saved image file.
88 Returns
89 -------
90 f : :py:class:`matplotlib.figure.Figure` or bool
91 Handle to a matplotlib Figure, or the value False if a preview could not be
92 generated
93 """
94 plt.close("all")
95 plt.rcParams["image.cmap"] = "gray"
97 try:
98 # Try to decode with common encodings
99 raw_bytes = f.read_bytes()
101 # Try encodings in order of preference
102 encodings_to_try = ["utf-8", "windows-1250", "windows-1252"]
103 content = None
105 for encoding in encodings_to_try:
106 try:
107 content = raw_bytes.decode(encoding)
108 _logger.debug("Successfully decoded %s with %s encoding", f, encoding)
109 break
110 except (UnicodeDecodeError, LookupError):
111 continue
113 if content is None:
114 _logger.warning(
115 "Failed to decode text file %s with any supported encoding", f
116 )
117 return False
119 except Exception as e:
120 _logger.warning("Failed to read text file %s: %s", f, e)
121 return False
123 # Normalize line endings (CRLF to LF) for consistent handling
124 content = content.replace("\r\n", "\n").replace("\r", "\n")
126 # Expand tabs to spaces (tabs can render as black squares in matplotlib)
127 content = content.expandtabs(tabsize=4)
129 # Count newlines to determine if it's data or a note
130 newline_count = content.count("\n")
132 # Threshold to distinguish between data (many newlines) and notes (few newlines)
133 # Using _MAX_ROWS_NOTE as threshold since notes are displayed in that many rows
134 is_data = newline_count > _MAX_ROWS_NOTE
136 if is_data:
137 # Data mode: first _MAX_COLS characters of first _MAX_ROWS_DATA lines
138 lines = content.split("\n")[:_MAX_ROWS_DATA]
139 formatted_text = "\n".join(line[:_MAX_COLS] for line in lines)
140 else:
141 # Note mode: wrap to _MAX_COLS columns, up to _MAX_ROWS_NOTE rows
142 # Wrap the text to _MAX_COLS columns
143 wrapper = textwrap.TextWrapper(width=_MAX_COLS)
144 wrapped_lines = []
145 for line in content.split("\n"):
146 if line.strip(): # Non-empty lines
147 wrapped_lines.extend(wrapper.wrap(line))
148 else: # Preserve empty lines
149 wrapped_lines.append("")
151 # Take first _MAX_ROWS_NOTE rows
152 formatted_text = "\n".join(wrapped_lines[:_MAX_ROWS_NOTE])
154 # Escape special characters that matplotlib's mathtext parser might interpret
155 # Replace $ with \$ to prevent mathtext parsing, and escape backslashes
156 formatted_text = formatted_text.replace("\\", "\\\\").replace("$", r"\$")
158 # Create a matplotlib figure with no frame
159 fig = plt.figure(
160 figsize=(_DEFAULT_SIZE, _DEFAULT_SIZE),
161 dpi=output_size / _DEFAULT_SIZE,
162 )
164 plt.axis("off")
166 # Add the text to the figure
167 # Using monospace font and left-aligned at top
168 # Use DejaVu Sans Mono for better Unicode/emoji support than generic monospace
169 # This font is included with matplotlib and has wider character support
170 fig.text(
171 0.02,
172 0.97,
173 formatted_text,
174 fontfamily="DejaVu Sans Mono",
175 fontsize=12,
176 verticalalignment="top",
177 horizontalalignment="left",
178 usetex=False,
179 linespacing=1.7, # Increase line spacing (default is 1.2)
180 )
182 fig.tight_layout()
184 # Save the figure
185 try:
186 fig.savefig(out_path, dpi=output_size / _DEFAULT_SIZE)
187 _pad_to_square(out_path, output_size)
188 except Exception as e:
189 _logger.warning("Failed to save text thumbnail to %s: %s", out_path, e)
190 plt.close(fig)
191 return False
192 else:
193 plt.close(fig)
194 return fig
197class TextPreviewGenerator:
198 """
199 Preview generator for text files.
201 This generator creates thumbnail previews of text files by rendering
202 the first few lines of text as an image.
203 """
205 name = "text_preview"
206 priority = 100
207 supported_extensions: ClassVar = {"txt"}
209 def supports(self, context: ExtractionContext) -> bool:
210 """
211 Check if this generator supports the given file.
213 Parameters
214 ----------
215 context
216 The extraction context containing file information
218 Returns
219 -------
220 bool
221 True if file extension is .txt
222 """
223 extension = context.file_path.suffix.lower().lstrip(".")
224 return extension == "txt"
226 def generate(self, context: ExtractionContext, output_path: Path) -> bool:
227 """
228 Generate a thumbnail preview from a text file.
230 Parameters
231 ----------
232 context
233 The extraction context containing file information
234 output_path
235 Path where the preview image should be saved
237 Returns
238 -------
239 bool
240 True if preview was successfully generated, False otherwise
241 """
242 try:
243 _logger.debug("Generating text preview for: %s", context.file_path)
245 # Generate the thumbnail using the local function
246 text_to_thumbnail(
247 context.file_path,
248 output_path,
249 output_size=500,
250 )
252 return output_path.exists()
253 except Exception as e:
254 _logger.warning(
255 "Failed to generate text preview for %s: %s",
256 context.file_path,
257 e,
258 )
259 return False