Coverage for fpdf2_textindex / alias.py: 66.33%

74 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-24 15:45 +0000

1"""Alias Registry.""" 

2 

3from collections.abc import Iterable, Iterator, Mapping 

4import logging 

5import re 

6from typing import Final, Literal 

7 

8from fpdf2_textindex.constants import LOGGER 

9from fpdf2_textindex.interface import Alias 

10 

11 

12class AliasRegistry(Mapping[str, Alias]): 

13 """Alias Registry. 

14 

15 Maps an alias by a name `"#alias"` to an entry by its label path. 

16 """ 

17 

18 _ALIAS_PREFIX: Final[Literal["#"]] = "#" 

19 _ALIAS_TOKEN_PATTERN: re.Pattern[str] = re.compile( 

20 rf"(?<!{_ALIAS_PREFIX:s}){_ALIAS_PREFIX:s}([a-zA-Z0-9\-_]+)" 

21 ) 

22 _ALIAS_DEFINITION_PATTERN: re.Pattern[str] = re.compile( 

23 rf"{_ALIAS_PREFIX:s}({_ALIAS_PREFIX:s}?[a-zA-Z0-9\-_]+)$" 

24 ) 

25 

26 def __init__(self) -> None: 

27 self._aliases: dict[str, Alias] = {} 

28 

29 def __getitem__(self, name: str) -> Alias: 

30 return self._aliases[name] 

31 

32 def __iter__(self) -> Iterator[str]: 

33 return iter(self._aliases) 

34 

35 def __len__(self) -> int: 

36 return len(self._aliases) 

37 

38 def __repr__(self) -> str: 

39 return f"{type(self).__name__:s}({len(self):d} aliases)" 

40 

41 def define(self, name: str, label_path: Iterable[str]) -> None: 

42 """Defines an alias. 

43 

44 Args: 

45 name: The name of the alias. 

46 label_path: The label path the alias will be replaced by. 

47 

48 Raises: 

49 ValueError: If the label path is empty. 

50 """ 

51 label_path = tuple(label_path) 

52 if len(label_path) == 0: 52 ↛ 53line 52 didn't jump to line 53 because the condition on line 52 was never true

53 msg = f"cannot create alias {name!r:s} with empty label path" 

54 raise ValueError(msg) 

55 

56 redefinition = False 

57 if ( 57 ↛ 61line 57 didn't jump to line 61 because the condition on line 57 was never true

58 name in self._aliases 

59 and self._aliases[name].label_path != label_path 

60 ): 

61 redefinition = True 

62 

63 self._aliases[name] = Alias(name=name, label_path=label_path) 

64 LOGGER.log( 

65 logging.WARNING if redefinition else logging.INFO, 

66 "\t%s alias '%s%s' as %r", 

67 "Redefined existing" if redefinition else "Defined new", 

68 self._ALIAS_PREFIX, 

69 name, 

70 self._aliases[name].joined_label_path, 

71 ) 

72 

73 def define_or_replace_from_label_path( 

74 self, 

75 label_path: list[str], 

76 label: str | None, 

77 content: str, 

78 alias_name: str | None, 

79 alias_start: int, 

80 directive_str: str, 

81 ) -> tuple[list[str], str | None, bool]: 

82 """Defines an alias from a label path and label or replaces an alias in 

83 it. 

84 

85 Args: 

86 label_path: The label path to use for the definition. 

87 label: The label of the parsed directive. 

88 content: The content of the parsed directive. 

89 alias_name: The name of the alias. 

90 alias_start: The start index of the alias in the directive. 

91 directive_str: The original directive. 

92 

93 Returns: 

94 The label path, the label, and whether it has been an unreferenced 

95 alias. The label path and the label can differ from the input in 

96 case the alias existed before. 

97 """ 

98 unreferenced_alias = False 

99 if alias_name is None: 

100 return label_path, label, unreferenced_alias 

101 

102 if alias_name.startswith(self._ALIAS_PREFIX): 

103 unreferenced_alias = True 

104 alias_name = alias_name.lstrip(self._ALIAS_PREFIX) 

105 

106 # Alias definition at end of an internally-specified label. 

107 # Trim alias portion from label, and define 

108 if alias_start > 0: 108 ↛ 115line 108 didn't jump to line 115 because the condition on line 108 was always true

109 assert label is not None 

110 self.define(alias_name, [*label_path, label]) 

111 

112 # Alias found at start of label: 

113 # Either an alias reference, or a definition without an internal label 

114 # (foo>#bar or just #bar) 

115 elif len(label_path) == 0: 

116 # No path components. Could be an alias definition at root, or an 

117 # alias reference 

118 

119 # Try to load the alias 

120 if alias_name in self._aliases: 

121 # Valid alias reference, load alias 

122 alias = self._aliases[alias_name] 

123 label_path = list(alias.label_path) 

124 assert label is None 

125 label = label_path.pop() 

126 LOGGER.info( 

127 "\tLoaded alias %r as %r for directive: %r", 

128 alias_name, 

129 alias.joined_label_path, 

130 directive_str, 

131 ) 

132 # No path components, and an alias reference to a non-existing 

133 # alias, define a new alias instead 

134 elif content: 

135 label = content 

136 self.define(alias_name, [label]) 

137 else: 

138 LOGGER.warning( 

139 "Cannot load nor define alias %r for directive: %r", 

140 alias_name, 

141 directive_str, 

142 ) 

143 

144 # Path components exist, so this is an alias definition without an 

145 # internal label 

146 else: 

147 if content: 

148 # We already had a label from either a bracketed span, or 

149 # implicitly, define alias 

150 label = content 

151 self.define(alias_name, [*label_path, label]) 

152 else: 

153 # No label specified either internally or previously; 

154 # can't define an alias. 

155 label = None 

156 LOGGER.warning( 

157 "Alias definition %r without a label: %r", 

158 alias_name, 

159 directive_str, 

160 ) 

161 return label_path, label, unreferenced_alias 

162 

163 def _replace_match(self, match: re.Match[str]) -> str: 

164 name = match.group(1) 

165 replacement = match.group(0) 

166 if name and name in self._aliases: 

167 replacement = self._aliases[name].joined_label_path 

168 return replacement 

169 

170 def replace_aliases(self, directive_str: str) -> str: 

171 """Replaces aliases in a directive by its defined label path. 

172 

173 Args: 

174 directive_str: The original directive. 

175 

176 Returns: 

177 The directive with replaced aliases. 

178 """ 

179 if len(self._aliases) == 0 or len(directive_str) == 0: 

180 return directive_str 

181 return self._ALIAS_TOKEN_PATTERN.sub(self._replace_match, directive_str) 

182 

183 def strip_alias(self, directive_str: str) -> tuple[str, str | None, int]: 

184 """Strips an alias definition from the end of a directive. 

185 

186 Args: 

187 directive_str: The original directive. 

188 

189 Returns: 

190 A tuple comprising the directive without the alias, 

191 the found alias name (or `None` in case of no alias directive), 

192 and the start index of the alias (or `-1` in case of no alias 

193 directive). 

194 """ 

195 match = self._ALIAS_DEFINITION_PATTERN.search(directive_str) 

196 alias_start = -1 

197 alias_name = None 

198 if match: 

199 alias_start = match.start() 

200 alias_name = match.group(1) 

201 directive_str = directive_str[: match.start()] 

202 return directive_str, alias_name, alias_start