Coverage for fpdf2_textindex / interface.py: 82.20%
192 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-24 15:45 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-24 15:45 +0000
1"""Interface."""
3from __future__ import annotations
5import abc
6from collections.abc import Iterable, Iterator
7import dataclasses
8import enum
9from typing import Any, ClassVar
11from typing_extensions import Self
13from fpdf2_textindex import constants as const
14from fpdf2_textindex.constants import LOGGER
15from fpdf2_textindex.md_emphasis import MDEmphasis
16from fpdf2_textindex.utils import join_label_path
19class _LabelPathABC(abc.ABC):
20 """Abstract Base class for dataclasses with `label_path`."""
22 @property
23 @abc.abstractmethod
24 def label_path(self) -> tuple[str, ...]:
25 """The label path."""
26 ...
28 @property
29 def joined_label_path(self) -> str:
30 """The joined label path."""
31 return join_label_path(self.label_path)
34@dataclasses.dataclass(frozen=True, kw_only=True, slots=True)
35class Alias(_LabelPathABC):
36 """Alias."""
38 name: str
39 """The name of the alias."""
41 label_path: tuple[str, ...]
42 """The label path of the alias."""
44 def __repr__(self) -> str:
45 return (
46 f"{type(self).__name__}"
47 f"(#{self.name:s} -> {self.joined_label_path!r:s})"
48 )
51@dataclasses.dataclass(kw_only=True, slots=True)
52class LinkLocation:
53 """Link Location."""
55 page: int
56 """The page the link is referened/used on."""
58 x: float
59 """The `x`-position on the page."""
61 y: float
62 """The `y`-position on the page."""
64 w: float
65 """The width the link has on the page."""
67 h: float
68 """The height the link has on the page."""
71@dataclasses.dataclass(kw_only=True, slots=True)
72class CrossReference(_LabelPathABC):
73 """Cross Reference."""
75 id: int
76 """The id of the cross reference."""
78 type: CrossReferenceType
79 """The type of the cross reference."""
81 label_path: tuple[str, ...]
82 """The label path the cross reference points to."""
84 location: LinkLocation | None = dataclasses.field(default=None, init=False)
85 """The (link) location in the document the cross reference is set at."""
87 def __str__(self) -> str:
88 return f"{self.type.capitalize():s} {self.joined_label_path:s}"
90 def __repr__(self) -> str:
91 return f"{type(self).__name__}('{self!s:s}')"
93 @property
94 def link(self) -> str:
95 """The link in the document that must be set in the text index to lead
96 from the text to the text index.
97 """
98 return f"{const.INDEX_ID_PREFIX:s}{self.id:d}"
101class CrossReferenceType(str, enum.Enum):
102 """Cross Reference Type."""
104 NONE = "none"
105 """No cross reference."""
107 SEE = "see"
108 """SEE-cross reference."""
110 ALSO = "see also"
111 """SEE ALSO-cross reference."""
113 def __str__(self) -> str:
114 return self.value
116 @classmethod
117 def _missing_(cls, value: Any) -> Self | None: # noqa: ANN401
118 if value is None:
119 return cls.NONE
120 if isinstance(value, str):
121 return cls(value.upper())
122 return None
125@dataclasses.dataclass(kw_only=True, repr=False, slots=True)
126class Node(_LabelPathABC):
127 """Node."""
129 _next_id: ClassVar[int] = 0
131 id: int = dataclasses.field(init=False)
132 """The id."""
134 label: str
135 """The label."""
137 parent: Self | None = None
138 """The parent."""
140 _children: list[Self] = dataclasses.field(default_factory=list, init=False)
142 def __post_init__(self) -> None:
143 self.id = type(self)._next_id
144 type(self)._next_id += 1
146 if self.parent is not None:
147 self.parent.add_child(self) # type: ignore[arg-type]
149 def __bool__(self) -> bool:
150 return True
152 def __iter__(self) -> Iterator[Self]:
153 yield self
154 yield from self.iter_children()
156 def __hash__(self) -> int:
157 return hash((self.id, self.label))
159 def __repr__(self) -> str:
160 kw: dict[str, int | str] = {}
161 kw["id"] = self.id
162 kw["label"] = repr(self.label)
163 kw["depth"] = self.depth
164 kw["label_path"] = repr(self.joined_label_path)
165 n_children = len(self.children)
166 kw["children"] = (
167 f"[{n_children:d} child{'ren' if n_children > 1 else '':s}]"
168 )
169 kw_str = ", ".join(f"{k:s}: {v!s:s}" for k, v in kw.items())
170 return f"{type(self).__name__:s}({kw_str:s})"
172 def __str__(self) -> str:
173 return self.label or ""
175 @property
176 def children(self) -> list[Self]:
177 """The sorted children."""
178 return sorted(self._children, key=lambda c: c.label)
180 @property
181 def depth(self) -> int:
182 """The depth.
184 Possible values are:
185 - (invisible) root: 0,
186 - entries: 1,
187 - subentries: 2,
188 - sub-subentries: 3.
190 Deeper entries are not recommended.
191 """
192 return sum(1 for _ in self.iter_parents()) + 1
194 @property
195 def label_path(self) -> tuple[str, ...]:
196 """The label path."""
197 return tuple(
198 reversed([self.label, *(p.label for p in self.iter_parents())])
199 )
201 def add_child(self, child: Self) -> None:
202 """Adds a child.
204 Args:
205 child: The child to add.
207 Raises:
208 ValueError: If there is already a child with the same label.
209 """
210 if self.get_child(child.label) is not None: 210 ↛ 211line 210 didn't jump to line 211 because the condition on line 210 was never true
211 msg = "cannot add second child with same label"
212 raise ValueError(msg)
213 child.parent = self
214 self._children.append(child)
216 def get_child(self, label: str) -> Self | None:
217 """Returns a child by its label or `None` if not existing.
219 Args:
220 label: The label to search by.
222 Returns:
223 The child with the label or `None` if not existing.
224 """
225 for child in self.children:
226 if child.label == label:
227 return child
228 return None
230 def iter_children(self) -> Iterator[Self]:
231 """Iterates over the children (going down).
233 Yields:
234 The first child, its grandchildren, great-grandchildren, ..., then
235 the second child, its grandchildren, great-grandchildren, ..., and
236 so forth.
237 """
238 for child in self.children:
239 yield from iter(child) # type: ignore[misc]
241 def iter_parents(self) -> Iterator[Self]:
242 """Iterates over the parents without the root (going up).
244 Yields:
245 The parent, grandparent, great-grandparent, ..., and so forth,
246 stopping before root.
247 """
248 # Do not yield root
249 if self.parent is None:
250 return
251 par = self.parent
252 while par.parent is not None:
253 yield par
254 par = par.parent
257@dataclasses.dataclass(kw_only=True, slots=True)
258class Reference:
259 """Reference."""
261 start_id: int
262 """The start id of the reference."""
264 start_suffix: str | None = None
265 """The start suffix of the reference or `None`."""
267 start_location: LinkLocation | None = dataclasses.field(
268 default=None, init=False
269 )
270 """The start (link) location in the document the reference is set at."""
272 end_id: int | None = dataclasses.field(default=None, init=False)
273 """The end id of the reference or `None`."""
275 end_suffix: str | None = dataclasses.field(default=None, init=False)
276 """The end suffix of the reference or `None`."""
278 end_location: LinkLocation | None = dataclasses.field(
279 default=None, init=False
280 )
281 """The end (link) location in the document the reference is set at."""
283 locator_emphasis: bool = False
284 """Whether to emphasize the locator (page number) of the reference in the
285 text index (`True`) or not (`False`)."""
287 @property
288 def start_link(self) -> str:
289 """The start link in the document that must be set in the text index to
290 lead from the text to the text index.
291 """
292 return f"{const.INDEX_ID_PREFIX:s}{self.start_id:d}"
294 @property
295 def end_link(self) -> str | None:
296 """The end link in the document that must be set in the text index to
297 lead from the text to the text index. In case of no end id, the end link
298 will be `None`.
299 """
300 if self.end_id is None:
301 return None
302 return f"{const.INDEX_ID_PREFIX:s}{self.end_id:d}"
305@dataclasses.dataclass(kw_only=True, repr=False, slots=True)
306class TextIndexEntry(Node):
307 """Text Index Entry."""
309 references: list[Reference] = dataclasses.field(
310 default_factory=list, init=False
311 )
312 """The references."""
314 cross_references: list[CrossReference] = dataclasses.field(
315 default_factory=list, init=False
316 )
317 """The cross references."""
319 sort_key: str | None = dataclasses.field(default=None, init=False)
320 """The sort key."""
322 def __hash__(self) -> int:
323 return hash((self.id, self.label))
325 @property
326 def children(self) -> list[TextIndexEntry]:
327 """The child entries."""
328 return sorted(self._children, key=lambda c: c.sort_label)
330 @property
331 def sort_label(self) -> str:
332 """The sort label of the entry."""
333 label = self.label
334 label = MDEmphasis.remove(self.label) if self.label else "\uffff"
335 if self.sort_key:
336 label = self.sort_key + label
337 return label.lower()
339 def add_cross_reference(
340 self,
341 id: int,
342 cross_ref_type: CrossReferenceType,
343 label_path: Iterable[str],
344 *,
345 strict: bool = True,
346 ) -> None:
347 """Adds a cross reference to the entry.
349 Args:
350 id: The id of the cross reference.
351 cross_ref_type: The type of the cross reference.
352 label_path: The label path of the cross reference.
353 strict: Whether to raise a `ValueError` if adding a SEE-cross
354 reference to an entry with former "normal" reference (locator).
355 Else, it will just be a warning and the SEE-cross reference will
356 be automatically converted to SEE ALSO. Defaults to `True`.
358 Raises:
359 ValueError: If `strict=True` and adding a SEE-cross reference to
360 an entry with former "normal" reference (locator).
361 """
362 if self.references and cross_ref_type == CrossReferenceType.SEE: 362 ↛ 363line 362 didn't jump to line 363 because the condition on line 362 was never true
363 if strict:
364 msg = (
365 f"cannot add a SEE-cross reference to entry "
366 f"{self.joined_label_path!r} with former reference "
367 f"(locator)"
368 )
369 raise ValueError(msg)
370 LOGGER.warning(
371 "Adding a SEE-cross reference to entry %r with former "
372 "reference (locator); cross reference will be converted to SEE "
373 "ALSO",
374 self.joined_label_path,
375 )
376 cross_ref_type = CrossReferenceType.ALSO
377 label_path = tuple(label_path)
378 if len(self.cross_references) > 0:
379 for cr in self.cross_references:
380 if cr.type == cross_ref_type and cr.label_path == label_path:
381 return
382 self.cross_references.append(
383 CrossReference(id=id, type=cross_ref_type, label_path=label_path)
384 )
386 def add_reference(
387 self,
388 start_id: int,
389 *,
390 locator_emphasis: bool = False,
391 start_suffix: str | None = None,
392 strict: bool = True,
393 ) -> None:
394 """Adds a reference (locator) to the entry.
396 Args:
397 start_id: The start id of the reference.
398 locator_emphasis: Whether to emphasize the locator of the reference.
399 Defaults to `False`.
400 start_suffix: The start suffix of the reference. Defaults to
401 `None`.
402 strict: Whether to raise a `ValueError` if adding a SEE-cross
403 reference to an entry with former "normal" reference (locator).
404 Else, it will just be a warning and the SEE-cross reference will
405 be automatically converted to SEE ALSO. Defaults to `True`.
407 Raises:
408 ValueError: If `strict=True` and adding a reference locator to an
409 entry with former SEE-cross reference.
410 """
411 if any( 411 ↛ 414line 411 didn't jump to line 414 because the condition on line 411 was never true
412 cr.type == CrossReferenceType.SEE for cr in self.cross_references
413 ):
414 if strict:
415 msg = (
416 f"cannot add a reference (locator) to entry "
417 f"{self.joined_label_path!r} with former SEE-cross "
418 f"reference"
419 )
420 raise ValueError(msg)
421 LOGGER.warning(
422 "Adding a reference (locator) to entry %r with former SEE-"
423 "cross reference(s); cross reference(s) will be converted to "
424 "SEE ALSO",
425 self.joined_label_path,
426 )
427 for cr in self.cross_references:
428 if cr.type == CrossReferenceType.SEE:
429 cr.type = CrossReferenceType.ALSO
431 self.references.append(
432 Reference(
433 start_id=start_id,
434 start_suffix=start_suffix,
435 locator_emphasis=locator_emphasis,
436 )
437 )
439 def update_latest_reference_end(
440 self,
441 end_id: int,
442 end_suffix: str | None = None,
443 ) -> None:
444 """Updates the end of the latest reference.
446 Args:
447 end_id: The end id of the latest reference.
448 end_suffix: The end suffix of the latest reference. Defaults to
449 `None`.
451 Raises:
452 RuntimeError: If there has been no reference before.
453 """
454 if len(self.references) == 0: 454 ↛ 455line 454 didn't jump to line 455 because the condition on line 454 was never true
455 msg = "cannot update latest reference end without reference"
456 raise RuntimeError(msg)
457 self.references[-1].end_id = end_id
458 if end_suffix:
459 self.references[-1].end_suffix = end_suffix