Coverage for nexusLIMS/extractors/plugins/preview_generators/text_preview.py: 100%

86 statements  

« prev     ^ index     » next       coverage.py v7.11.3, created at 2026-03-24 05:23 +0000

1"""Text file preview generator.""" 

2 

3import logging 

4import textwrap 

5from pathlib import Path 

6from typing import ClassVar, Union 

7 

8import matplotlib.pyplot as plt 

9from matplotlib.figure import Figure 

10from PIL import Image 

11 

12from nexusLIMS.extractors.base import ExtractionContext 

13 

14_logger = logging.getLogger(__name__) 

15 

16_LANCZOS = Image.Resampling.LANCZOS 

17 

18# Constants for text preview formatting 

19_MAX_ROWS_NOTE = 18 # Maximum rows for note-style text 

20_MAX_ROWS_DATA = 17 # Maximum rows for data-style text 

21_MAX_COLS = 44 # Maximum columns for text display 

22_DEFAULT_SIZE = 5 # default size in inches for the preview 

23 

24 

25def _pad_to_square(im_path: Path, new_width: int = 500): 

26 """ 

27 Pad an image to square. 

28 

29 Helper method to pad an image saved on disk to a square with size 

30 ``width x width``. This ensures consistent display on the front-end web 

31 page. Increasing the size of a dimension is done by padding with empty 

32 space. The original image is overwritten. 

33 

34 Method adapted from: 

35 https://jdhao.github.io/2017/11/06/resize-image-to-square-with-padding/ 

36 

37 Parameters 

38 ---------- 

39 im_path 

40 The path to the image that should be resized/padded 

41 new_width 

42 Desired output width/height of the image (in pixels) 

43 """ 

44 image = Image.open(im_path) 

45 old_size = image.size # old_size[0] is in (width, height) format 

46 ratio = float(new_width) / max(old_size) 

47 new_size = tuple(int(x * ratio) for x in old_size) 

48 image = image.resize(new_size, _LANCZOS) 

49 

50 new_im = Image.new("RGBA", (new_width, new_width)) 

51 new_im.paste( 

52 image, 

53 ((new_width - new_size[0]) // 2, (new_width - new_size[1]) // 2), 

54 ) 

55 new_im.save(im_path) 

56 

57 

58def text_to_thumbnail( 

59 f: Path, 

60 out_path: Path, 

61 output_size: int = 500, 

62) -> Union[Figure, bool]: 

63 """ 

64 Generate a preview thumbnail from a text file. 

65 

66 For a text file, the contents will be formatted and written to a 500x500 

67 pixel jpg image of size 5 in by 5 in. 

68 

69 If the text file has many newlines, it is probably data and the first 42 

70 characters of each of the first 20 lines of the text file will be written 

71 to the image. 

72 

73 If the text file has a few (or fewer) newlines, it is probably a manually 

74 generated note and the text will be written to a 42 column, 18 row box 

75 until the space is exhausted. 

76 

77 Parameters 

78 ---------- 

79 f 

80 The path of a text file for which a thumbnail should be generated. 

81 out_path 

82 A path to the desired thumbnail filename. All formats supported by 

83 :py:meth:`~matplotlib.figure.Figure.savefig` can be used. 

84 output_size : int 

85 The pixel width (and height, since the image is padded to square) of 

86 the saved image file. 

87 

88 Returns 

89 ------- 

90 f : :py:class:`matplotlib.figure.Figure` or bool 

91 Handle to a matplotlib Figure, or the value False if a preview could not be 

92 generated 

93 """ 

94 plt.close("all") 

95 plt.rcParams["image.cmap"] = "gray" 

96 

97 try: 

98 # Try to decode with common encodings 

99 raw_bytes = f.read_bytes() 

100 

101 # Try encodings in order of preference 

102 encodings_to_try = ["utf-8", "windows-1250", "windows-1252"] 

103 content = None 

104 

105 for encoding in encodings_to_try: 

106 try: 

107 content = raw_bytes.decode(encoding) 

108 _logger.debug("Successfully decoded %s with %s encoding", f, encoding) 

109 break 

110 except (UnicodeDecodeError, LookupError): 

111 continue 

112 

113 if content is None: 

114 _logger.warning( 

115 "Failed to decode text file %s with any supported encoding", f 

116 ) 

117 return False 

118 

119 except Exception as e: 

120 _logger.warning("Failed to read text file %s: %s", f, e) 

121 return False 

122 

123 # Normalize line endings (CRLF to LF) for consistent handling 

124 content = content.replace("\r\n", "\n").replace("\r", "\n") 

125 

126 # Expand tabs to spaces (tabs can render as black squares in matplotlib) 

127 content = content.expandtabs(tabsize=4) 

128 

129 # Count newlines to determine if it's data or a note 

130 newline_count = content.count("\n") 

131 

132 # Threshold to distinguish between data (many newlines) and notes (few newlines) 

133 # Using _MAX_ROWS_NOTE as threshold since notes are displayed in that many rows 

134 is_data = newline_count > _MAX_ROWS_NOTE 

135 

136 if is_data: 

137 # Data mode: first _MAX_COLS characters of first _MAX_ROWS_DATA lines 

138 lines = content.split("\n")[:_MAX_ROWS_DATA] 

139 formatted_text = "\n".join(line[:_MAX_COLS] for line in lines) 

140 else: 

141 # Note mode: wrap to _MAX_COLS columns, up to _MAX_ROWS_NOTE rows 

142 # Wrap the text to _MAX_COLS columns 

143 wrapper = textwrap.TextWrapper(width=_MAX_COLS) 

144 wrapped_lines = [] 

145 for line in content.split("\n"): 

146 if line.strip(): # Non-empty lines 

147 wrapped_lines.extend(wrapper.wrap(line)) 

148 else: # Preserve empty lines 

149 wrapped_lines.append("") 

150 

151 # Take first _MAX_ROWS_NOTE rows 

152 formatted_text = "\n".join(wrapped_lines[:_MAX_ROWS_NOTE]) 

153 

154 # Escape special characters that matplotlib's mathtext parser might interpret 

155 # Replace $ with \$ to prevent mathtext parsing, and escape backslashes 

156 formatted_text = formatted_text.replace("\\", "\\\\").replace("$", r"\$") 

157 

158 # Create a matplotlib figure with no frame 

159 fig = plt.figure( 

160 figsize=(_DEFAULT_SIZE, _DEFAULT_SIZE), 

161 dpi=output_size / _DEFAULT_SIZE, 

162 ) 

163 

164 plt.axis("off") 

165 

166 # Add the text to the figure 

167 # Using monospace font and left-aligned at top 

168 # Use DejaVu Sans Mono for better Unicode/emoji support than generic monospace 

169 # This font is included with matplotlib and has wider character support 

170 fig.text( 

171 0.02, 

172 0.97, 

173 formatted_text, 

174 fontfamily="DejaVu Sans Mono", 

175 fontsize=12, 

176 verticalalignment="top", 

177 horizontalalignment="left", 

178 usetex=False, 

179 linespacing=1.7, # Increase line spacing (default is 1.2) 

180 ) 

181 

182 fig.tight_layout() 

183 

184 # Save the figure 

185 try: 

186 fig.savefig(out_path, dpi=output_size / _DEFAULT_SIZE) 

187 _pad_to_square(out_path, output_size) 

188 except Exception as e: 

189 _logger.warning("Failed to save text thumbnail to %s: %s", out_path, e) 

190 plt.close(fig) 

191 return False 

192 else: 

193 plt.close(fig) 

194 return fig 

195 

196 

197class TextPreviewGenerator: 

198 """ 

199 Preview generator for text files. 

200 

201 This generator creates thumbnail previews of text files by rendering 

202 the first few lines of text as an image. 

203 """ 

204 

205 name = "text_preview" 

206 priority = 100 

207 supported_extensions: ClassVar = {"txt"} 

208 

209 def supports(self, context: ExtractionContext) -> bool: 

210 """ 

211 Check if this generator supports the given file. 

212 

213 Parameters 

214 ---------- 

215 context 

216 The extraction context containing file information 

217 

218 Returns 

219 ------- 

220 bool 

221 True if file extension is .txt 

222 """ 

223 extension = context.file_path.suffix.lower().lstrip(".") 

224 return extension == "txt" 

225 

226 def generate(self, context: ExtractionContext, output_path: Path) -> bool: 

227 """ 

228 Generate a thumbnail preview from a text file. 

229 

230 Parameters 

231 ---------- 

232 context 

233 The extraction context containing file information 

234 output_path 

235 Path where the preview image should be saved 

236 

237 Returns 

238 ------- 

239 bool 

240 True if preview was successfully generated, False otherwise 

241 """ 

242 try: 

243 _logger.debug("Generating text preview for: %s", context.file_path) 

244 

245 # Generate the thumbnail using the local function 

246 text_to_thumbnail( 

247 context.file_path, 

248 output_path, 

249 output_size=500, 

250 ) 

251 

252 return output_path.exists() 

253 except Exception as e: 

254 _logger.warning( 

255 "Failed to generate text preview for %s: %s", 

256 context.file_path, 

257 e, 

258 ) 

259 return False