Coverage for fpdf2_textindex / interface.py: 82.20%

192 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-24 15:45 +0000

1"""Interface.""" 

2 

3from __future__ import annotations 

4 

5import abc 

6from collections.abc import Iterable, Iterator 

7import dataclasses 

8import enum 

9from typing import Any, ClassVar 

10 

11from typing_extensions import Self 

12 

13from fpdf2_textindex import constants as const 

14from fpdf2_textindex.constants import LOGGER 

15from fpdf2_textindex.md_emphasis import MDEmphasis 

16from fpdf2_textindex.utils import join_label_path 

17 

18 

19class _LabelPathABC(abc.ABC): 

20 """Abstract Base class for dataclasses with `label_path`.""" 

21 

22 @property 

23 @abc.abstractmethod 

24 def label_path(self) -> tuple[str, ...]: 

25 """The label path.""" 

26 ... 

27 

28 @property 

29 def joined_label_path(self) -> str: 

30 """The joined label path.""" 

31 return join_label_path(self.label_path) 

32 

33 

34@dataclasses.dataclass(frozen=True, kw_only=True, slots=True) 

35class Alias(_LabelPathABC): 

36 """Alias.""" 

37 

38 name: str 

39 """The name of the alias.""" 

40 

41 label_path: tuple[str, ...] 

42 """The label path of the alias.""" 

43 

44 def __repr__(self) -> str: 

45 return ( 

46 f"{type(self).__name__}" 

47 f"(#{self.name:s} -> {self.joined_label_path!r:s})" 

48 ) 

49 

50 

51@dataclasses.dataclass(kw_only=True, slots=True) 

52class LinkLocation: 

53 """Link Location.""" 

54 

55 page: int 

56 """The page the link is referened/used on.""" 

57 

58 x: float 

59 """The `x`-position on the page.""" 

60 

61 y: float 

62 """The `y`-position on the page.""" 

63 

64 w: float 

65 """The width the link has on the page.""" 

66 

67 h: float 

68 """The height the link has on the page.""" 

69 

70 

71@dataclasses.dataclass(kw_only=True, slots=True) 

72class CrossReference(_LabelPathABC): 

73 """Cross Reference.""" 

74 

75 id: int 

76 """The id of the cross reference.""" 

77 

78 type: CrossReferenceType 

79 """The type of the cross reference.""" 

80 

81 label_path: tuple[str, ...] 

82 """The label path the cross reference points to.""" 

83 

84 location: LinkLocation | None = dataclasses.field(default=None, init=False) 

85 """The (link) location in the document the cross reference is set at.""" 

86 

87 def __str__(self) -> str: 

88 return f"{self.type.capitalize():s} {self.joined_label_path:s}" 

89 

90 def __repr__(self) -> str: 

91 return f"{type(self).__name__}('{self!s:s}')" 

92 

93 @property 

94 def link(self) -> str: 

95 """The link in the document that must be set in the text index to lead 

96 from the text to the text index. 

97 """ 

98 return f"{const.INDEX_ID_PREFIX:s}{self.id:d}" 

99 

100 

101class CrossReferenceType(str, enum.Enum): 

102 """Cross Reference Type.""" 

103 

104 NONE = "none" 

105 """No cross reference.""" 

106 

107 SEE = "see" 

108 """SEE-cross reference.""" 

109 

110 ALSO = "see also" 

111 """SEE ALSO-cross reference.""" 

112 

113 def __str__(self) -> str: 

114 return self.value 

115 

116 @classmethod 

117 def _missing_(cls, value: Any) -> Self | None: # noqa: ANN401 

118 if value is None: 

119 return cls.NONE 

120 if isinstance(value, str): 

121 return cls(value.upper()) 

122 return None 

123 

124 

125@dataclasses.dataclass(kw_only=True, repr=False, slots=True) 

126class Node(_LabelPathABC): 

127 """Node.""" 

128 

129 _next_id: ClassVar[int] = 0 

130 

131 id: int = dataclasses.field(init=False) 

132 """The id.""" 

133 

134 label: str 

135 """The label.""" 

136 

137 parent: Self | None = None 

138 """The parent.""" 

139 

140 _children: list[Self] = dataclasses.field(default_factory=list, init=False) 

141 

142 def __post_init__(self) -> None: 

143 self.id = type(self)._next_id 

144 type(self)._next_id += 1 

145 

146 if self.parent is not None: 

147 self.parent.add_child(self) # type: ignore[arg-type] 

148 

149 def __bool__(self) -> bool: 

150 return True 

151 

152 def __iter__(self) -> Iterator[Self]: 

153 yield self 

154 yield from self.iter_children() 

155 

156 def __hash__(self) -> int: 

157 return hash((self.id, self.label)) 

158 

159 def __repr__(self) -> str: 

160 kw: dict[str, int | str] = {} 

161 kw["id"] = self.id 

162 kw["label"] = repr(self.label) 

163 kw["depth"] = self.depth 

164 kw["label_path"] = repr(self.joined_label_path) 

165 n_children = len(self.children) 

166 kw["children"] = ( 

167 f"[{n_children:d} child{'ren' if n_children > 1 else '':s}]" 

168 ) 

169 kw_str = ", ".join(f"{k:s}: {v!s:s}" for k, v in kw.items()) 

170 return f"{type(self).__name__:s}({kw_str:s})" 

171 

172 def __str__(self) -> str: 

173 return self.label or "" 

174 

175 @property 

176 def children(self) -> list[Self]: 

177 """The sorted children.""" 

178 return sorted(self._children, key=lambda c: c.label) 

179 

180 @property 

181 def depth(self) -> int: 

182 """The depth. 

183 

184 Possible values are: 

185 - (invisible) root: 0, 

186 - entries: 1, 

187 - subentries: 2, 

188 - sub-subentries: 3. 

189 

190 Deeper entries are not recommended. 

191 """ 

192 return sum(1 for _ in self.iter_parents()) + 1 

193 

194 @property 

195 def label_path(self) -> tuple[str, ...]: 

196 """The label path.""" 

197 return tuple( 

198 reversed([self.label, *(p.label for p in self.iter_parents())]) 

199 ) 

200 

201 def add_child(self, child: Self) -> None: 

202 """Adds a child. 

203 

204 Args: 

205 child: The child to add. 

206 

207 Raises: 

208 ValueError: If there is already a child with the same label. 

209 """ 

210 if self.get_child(child.label) is not None: 210 ↛ 211line 210 didn't jump to line 211 because the condition on line 210 was never true

211 msg = "cannot add second child with same label" 

212 raise ValueError(msg) 

213 child.parent = self 

214 self._children.append(child) 

215 

216 def get_child(self, label: str) -> Self | None: 

217 """Returns a child by its label or `None` if not existing. 

218 

219 Args: 

220 label: The label to search by. 

221 

222 Returns: 

223 The child with the label or `None` if not existing. 

224 """ 

225 for child in self.children: 

226 if child.label == label: 

227 return child 

228 return None 

229 

230 def iter_children(self) -> Iterator[Self]: 

231 """Iterates over the children (going down). 

232 

233 Yields: 

234 The first child, its grandchildren, great-grandchildren, ..., then 

235 the second child, its grandchildren, great-grandchildren, ..., and 

236 so forth. 

237 """ 

238 for child in self.children: 

239 yield from iter(child) # type: ignore[misc] 

240 

241 def iter_parents(self) -> Iterator[Self]: 

242 """Iterates over the parents without the root (going up). 

243 

244 Yields: 

245 The parent, grandparent, great-grandparent, ..., and so forth, 

246 stopping before root. 

247 """ 

248 # Do not yield root 

249 if self.parent is None: 

250 return 

251 par = self.parent 

252 while par.parent is not None: 

253 yield par 

254 par = par.parent 

255 

256 

257@dataclasses.dataclass(kw_only=True, slots=True) 

258class Reference: 

259 """Reference.""" 

260 

261 start_id: int 

262 """The start id of the reference.""" 

263 

264 start_suffix: str | None = None 

265 """The start suffix of the reference or `None`.""" 

266 

267 start_location: LinkLocation | None = dataclasses.field( 

268 default=None, init=False 

269 ) 

270 """The start (link) location in the document the reference is set at.""" 

271 

272 end_id: int | None = dataclasses.field(default=None, init=False) 

273 """The end id of the reference or `None`.""" 

274 

275 end_suffix: str | None = dataclasses.field(default=None, init=False) 

276 """The end suffix of the reference or `None`.""" 

277 

278 end_location: LinkLocation | None = dataclasses.field( 

279 default=None, init=False 

280 ) 

281 """The end (link) location in the document the reference is set at.""" 

282 

283 locator_emphasis: bool = False 

284 """Whether to emphasize the locator (page number) of the reference in the 

285 text index (`True`) or not (`False`).""" 

286 

287 @property 

288 def start_link(self) -> str: 

289 """The start link in the document that must be set in the text index to 

290 lead from the text to the text index. 

291 """ 

292 return f"{const.INDEX_ID_PREFIX:s}{self.start_id:d}" 

293 

294 @property 

295 def end_link(self) -> str | None: 

296 """The end link in the document that must be set in the text index to 

297 lead from the text to the text index. In case of no end id, the end link 

298 will be `None`. 

299 """ 

300 if self.end_id is None: 

301 return None 

302 return f"{const.INDEX_ID_PREFIX:s}{self.end_id:d}" 

303 

304 

305@dataclasses.dataclass(kw_only=True, repr=False, slots=True) 

306class TextIndexEntry(Node): 

307 """Text Index Entry.""" 

308 

309 references: list[Reference] = dataclasses.field( 

310 default_factory=list, init=False 

311 ) 

312 """The references.""" 

313 

314 cross_references: list[CrossReference] = dataclasses.field( 

315 default_factory=list, init=False 

316 ) 

317 """The cross references.""" 

318 

319 sort_key: str | None = dataclasses.field(default=None, init=False) 

320 """The sort key.""" 

321 

322 def __hash__(self) -> int: 

323 return hash((self.id, self.label)) 

324 

325 @property 

326 def children(self) -> list[TextIndexEntry]: 

327 """The child entries.""" 

328 return sorted(self._children, key=lambda c: c.sort_label) 

329 

330 @property 

331 def sort_label(self) -> str: 

332 """The sort label of the entry.""" 

333 label = self.label 

334 label = MDEmphasis.remove(self.label) if self.label else "\uffff" 

335 if self.sort_key: 

336 label = self.sort_key + label 

337 return label.lower() 

338 

339 def add_cross_reference( 

340 self, 

341 id: int, 

342 cross_ref_type: CrossReferenceType, 

343 label_path: Iterable[str], 

344 *, 

345 strict: bool = True, 

346 ) -> None: 

347 """Adds a cross reference to the entry. 

348 

349 Args: 

350 id: The id of the cross reference. 

351 cross_ref_type: The type of the cross reference. 

352 label_path: The label path of the cross reference. 

353 strict: Whether to raise a `ValueError` if adding a SEE-cross 

354 reference to an entry with former "normal" reference (locator). 

355 Else, it will just be a warning and the SEE-cross reference will 

356 be automatically converted to SEE ALSO. Defaults to `True`. 

357 

358 Raises: 

359 ValueError: If `strict=True` and adding a SEE-cross reference to 

360 an entry with former "normal" reference (locator). 

361 """ 

362 if self.references and cross_ref_type == CrossReferenceType.SEE: 362 ↛ 363line 362 didn't jump to line 363 because the condition on line 362 was never true

363 if strict: 

364 msg = ( 

365 f"cannot add a SEE-cross reference to entry " 

366 f"{self.joined_label_path!r} with former reference " 

367 f"(locator)" 

368 ) 

369 raise ValueError(msg) 

370 LOGGER.warning( 

371 "Adding a SEE-cross reference to entry %r with former " 

372 "reference (locator); cross reference will be converted to SEE " 

373 "ALSO", 

374 self.joined_label_path, 

375 ) 

376 cross_ref_type = CrossReferenceType.ALSO 

377 label_path = tuple(label_path) 

378 if len(self.cross_references) > 0: 

379 for cr in self.cross_references: 

380 if cr.type == cross_ref_type and cr.label_path == label_path: 

381 return 

382 self.cross_references.append( 

383 CrossReference(id=id, type=cross_ref_type, label_path=label_path) 

384 ) 

385 

386 def add_reference( 

387 self, 

388 start_id: int, 

389 *, 

390 locator_emphasis: bool = False, 

391 start_suffix: str | None = None, 

392 strict: bool = True, 

393 ) -> None: 

394 """Adds a reference (locator) to the entry. 

395 

396 Args: 

397 start_id: The start id of the reference. 

398 locator_emphasis: Whether to emphasize the locator of the reference. 

399 Defaults to `False`. 

400 start_suffix: The start suffix of the reference. Defaults to 

401 `None`. 

402 strict: Whether to raise a `ValueError` if adding a SEE-cross 

403 reference to an entry with former "normal" reference (locator). 

404 Else, it will just be a warning and the SEE-cross reference will 

405 be automatically converted to SEE ALSO. Defaults to `True`. 

406 

407 Raises: 

408 ValueError: If `strict=True` and adding a reference locator to an 

409 entry with former SEE-cross reference. 

410 """ 

411 if any( 411 ↛ 414line 411 didn't jump to line 414 because the condition on line 411 was never true

412 cr.type == CrossReferenceType.SEE for cr in self.cross_references 

413 ): 

414 if strict: 

415 msg = ( 

416 f"cannot add a reference (locator) to entry " 

417 f"{self.joined_label_path!r} with former SEE-cross " 

418 f"reference" 

419 ) 

420 raise ValueError(msg) 

421 LOGGER.warning( 

422 "Adding a reference (locator) to entry %r with former SEE-" 

423 "cross reference(s); cross reference(s) will be converted to " 

424 "SEE ALSO", 

425 self.joined_label_path, 

426 ) 

427 for cr in self.cross_references: 

428 if cr.type == CrossReferenceType.SEE: 

429 cr.type = CrossReferenceType.ALSO 

430 

431 self.references.append( 

432 Reference( 

433 start_id=start_id, 

434 start_suffix=start_suffix, 

435 locator_emphasis=locator_emphasis, 

436 ) 

437 ) 

438 

439 def update_latest_reference_end( 

440 self, 

441 end_id: int, 

442 end_suffix: str | None = None, 

443 ) -> None: 

444 """Updates the end of the latest reference. 

445 

446 Args: 

447 end_id: The end id of the latest reference. 

448 end_suffix: The end suffix of the latest reference. Defaults to 

449 `None`. 

450 

451 Raises: 

452 RuntimeError: If there has been no reference before. 

453 """ 

454 if len(self.references) == 0: 454 ↛ 455line 454 didn't jump to line 455 because the condition on line 454 was never true

455 msg = "cannot update latest reference end without reference" 

456 raise RuntimeError(msg) 

457 self.references[-1].end_id = end_id 

458 if end_suffix: 

459 self.references[-1].end_suffix = end_suffix