Coverage for nexusLIMS/extractors/plugins/preview_generators/text

1"""Text file preview generator."""

3import logging

4import textwrap

5from pathlib import Path

6from typing import ClassVar, Union

8import matplotlib.pyplot as plt

9from matplotlib.figure import Figure

10from PIL import Image

12from nexusLIMS.extractors.base import ExtractionContext

14_logger = logging.getLogger(__name__)

16_LANCZOS = Image.Resampling.LANCZOS

18# Constants for text preview formatting

19_MAX_ROWS_NOTE = 18 # Maximum rows for note-style text

20_MAX_ROWS_DATA = 17 # Maximum rows for data-style text

21_MAX_COLS = 44 # Maximum columns for text display

22_DEFAULT_SIZE = 5 # default size in inches for the preview

25def _pad_to_square(im_path: Path, new_width: int = 500):

26 """

27 Pad an image to square.

29 Helper method to pad an image saved on disk to a square with size

30 ``width x width``. This ensures consistent display on the front-end web

31 page. Increasing the size of a dimension is done by padding with empty

32 space. The original image is overwritten.

34 Method adapted from:

35 https://jdhao.github.io/2017/11/06/resize-image-to-square-with-padding/

37 Parameters

38 ----------

39 im_path

40 The path to the image that should be resized/padded

41 new_width

42 Desired output width/height of the image (in pixels)

43 """

44 image = Image.open(im_path)

45 old_size = image.size # old_size[0] is in (width, height) format

46 ratio = float(new_width) / max(old_size)

47 new_size = tuple(int(x * ratio) for x in old_size)

48 image = image.resize(new_size, _LANCZOS)

50 new_im = Image.new("RGBA", (new_width, new_width))

51 new_im.paste(

52 image,

53 ((new_width - new_size[0]) // 2, (new_width - new_size[1]) // 2),

54 )

55 new_im.save(im_path)

58def text_to_thumbnail(

59 f: Path,

60 out_path: Path,

61 output_size: int = 500,

62) -> Union[Figure, bool]:

63 """

64 Generate a preview thumbnail from a text file.

66 For a text file, the contents will be formatted and written to a 500x500

67 pixel jpg image of size 5 in by 5 in.

69 If the text file has many newlines, it is probably data and the first 42

70 characters of each of the first 20 lines of the text file will be written

71 to the image.

73 If the text file has a few (or fewer) newlines, it is probably a manually

74 generated note and the text will be written to a 42 column, 18 row box

75 until the space is exhausted.

77 Parameters

78 ----------

79 f

80 The path of a text file for which a thumbnail should be generated.

81 out_path

82 A path to the desired thumbnail filename. All formats supported by

83 :py:meth:`~matplotlib.figure.Figure.savefig` can be used.

84 output_size : int

85 The pixel width (and height, since the image is padded to square) of

86 the saved image file.

88 Returns

89 -------

90 f : :py:class:`matplotlib.figure.Figure` or bool

91 Handle to a matplotlib Figure, or the value False if a preview could not be

92 generated

93 """

94 plt.close("all")

95 plt.rcParams["image.cmap"] = "gray"

97 try:

98 # Try to decode with common encodings

99 raw_bytes = f.read_bytes()

100

101 # Try encodings in order of preference

102 encodings_to_try = ["utf-8", "windows-1250", "windows-1252"]

103 content = None

104

105 for encoding in encodings_to_try:

106 try:

107 content = raw_bytes.decode(encoding)

108 _logger.debug("Successfully decoded %s with %s encoding", f, encoding)

109 break

110 except (UnicodeDecodeError, LookupError):

111 continue

112

113 if content is None:

114 _logger.warning(

115 "Failed to decode text file %s with any supported encoding", f

116 )

117 return False

118

119 except Exception as e:

120 _logger.warning("Failed to read text file %s: %s", f, e)

121 return False

122

123 # Normalize line endings (CRLF to LF) for consistent handling

124 content = content.replace("\r\n", "\n").replace("\r", "\n")

125

126 # Expand tabs to spaces (tabs can render as black squares in matplotlib)

127 content = content.expandtabs(tabsize=4)

128

129 # Count newlines to determine if it's data or a note

130 newline_count = content.count("\n")

131

132 # Threshold to distinguish between data (many newlines) and notes (few newlines)

133 # Using _MAX_ROWS_NOTE as threshold since notes are displayed in that many rows

134 is_data = newline_count > _MAX_ROWS_NOTE

135

136 if is_data:

137 # Data mode: first _MAX_COLS characters of first _MAX_ROWS_DATA lines

138 lines = content.split("\n")[:_MAX_ROWS_DATA]

139 formatted_text = "\n".join(line[:_MAX_COLS] for line in lines)

140 else:

141 # Note mode: wrap to _MAX_COLS columns, up to _MAX_ROWS_NOTE rows

142 # Wrap the text to _MAX_COLS columns

143 wrapper = textwrap.TextWrapper(width=_MAX_COLS)

144 wrapped_lines = []

145 for line in content.split("\n"):

146 if line.strip(): # Non-empty lines

147 wrapped_lines.extend(wrapper.wrap(line))

148 else: # Preserve empty lines

149 wrapped_lines.append("")

150

151 # Take first _MAX_ROWS_NOTE rows

152 formatted_text = "\n".join(wrapped_lines[:_MAX_ROWS_NOTE])

153

154 # Escape special characters that matplotlib's mathtext parser might interpret

155 # Replace $ with \$ to prevent mathtext parsing, and escape backslashes

156 formatted_text = formatted_text.replace("\\", "\\\\").replace("$", r"\$")

157

158 # Create a matplotlib figure with no frame

159 fig = plt.figure(

160 figsize=(_DEFAULT_SIZE, _DEFAULT_SIZE),

161 dpi=output_size / _DEFAULT_SIZE,

162 )

163

164 plt.axis("off")

165

166 # Add the text to the figure

167 # Using monospace font and left-aligned at top

168 # Use DejaVu Sans Mono for better Unicode/emoji support than generic monospace

169 # This font is included with matplotlib and has wider character support

170 fig.text(

171 0.02,

172 0.97,

173 formatted_text,

174 fontfamily="DejaVu Sans Mono",

175 fontsize=12,

176 verticalalignment="top",

177 horizontalalignment="left",

178 usetex=False,

179 linespacing=1.7, # Increase line spacing (default is 1.2)

180 )

181

182 fig.tight_layout()

183

184 # Save the figure

185 try:

186 fig.savefig(out_path, dpi=output_size / _DEFAULT_SIZE)

187 _pad_to_square(out_path, output_size)

188 except Exception as e:

189 _logger.warning("Failed to save text thumbnail to %s: %s", out_path, e)

190 plt.close(fig)

191 return False

192 else:

193 plt.close(fig)

194 return fig

195

196

197class TextPreviewGenerator:

198 """

199 Preview generator for text files.

200

201 This generator creates thumbnail previews of text files by rendering

202 the first few lines of text as an image.

203 """

204

205 name = "text_preview"

206 priority = 100

207 supported_extensions: ClassVar = {"txt"}

208

209 def supports(self, context: ExtractionContext) -> bool:

210 """

211 Check if this generator supports the given file.

212

213 Parameters

214 ----------

215 context

216 The extraction context containing file information

217

218 Returns

219 -------

220 bool

221 True if file extension is .txt

222 """

223 extension = context.file_path.suffix.lower().lstrip(".")

224 return extension == "txt"

225

226 def generate(self, context: ExtractionContext, output_path: Path) -> bool:

227 """

228 Generate a thumbnail preview from a text file.

229

230 Parameters

231 ----------

232 context

233 The extraction context containing file information

234 output_path

235 Path where the preview image should be saved

236

237 Returns

238 -------

239 bool

240 True if preview was successfully generated, False otherwise

241 """

242 try:

243 _logger.debug("Generating text preview for: %s", context.file_path)

244

245 # Generate the thumbnail using the local function

246 text_to_thumbnail(

247 context.file_path,

248 output_path,

249 output_size=500,

250 )

251

252 return output_path.exists()

253 except Exception as e:

254 _logger.warning(

255 "Failed to generate text preview for %s: %s",

256 context.file_path,

257 e,

258 )

259 return False

Coverage for nexusLIMS/extractors/plugins/preview_generators/text_preview.py: 100%

86 statements