Coverage for openhcs/core/steps/abstract.py: 93.3%

28 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-14 05:57 +0000

1""" 

2Abstract Step Interface 

3 

4This module defines the AbstractStep interface, which is the base class for all steps 

5in the OpenHCS pipeline. It provides the core functionality for step execution, 

6validation, and state management. 

7 

8Doctrinal Clauses: 

9- Clause 3 — Declarative Primacy 

10- Clause 12 — Absolute Clean Execution 

11- Clause 21 — Context Immunity 

12- Clause 65 — No Fallback Logic 

13- Clause 66 — Immutability After Construction 

14- Clause 88 — No Inferred Capabilities 

15- Clause 92 — Structural Validation First 

16- Clause 106-A — Declared Memory Types 

17- Clause 244 — Rot Intolerance 

18- Clause 245 — Declarative Enforcement 

19- Clause 246 — Statelessness Mandate 

20- Clause 251 — Declarative Memory Conversion 

21- Clause 503 — Cognitive Load Transfer 

22""" 

23 

24import abc 

25import logging 

26from abc import abstractmethod 

27from pathlib import Path 

28from typing import TYPE_CHECKING, List, Optional, Union 

29 

30from openhcs.constants.constants import VariableComponents, GroupBy, DEFAULT_VARIABLE_COMPONENTS 

31from openhcs.constants.input_source import InputSource 

32from openhcs.core.config import PathPlanningConfig, MaterializationPathConfig 

33 

34# ProcessingContext is used in type hints 

35if TYPE_CHECKING: 35 ↛ 36line 35 didn't jump to line 36 because the condition on line 35 was never true

36 from openhcs.core.context.processing_context import ProcessingContext 

37# StepResult is no longer returned by process() 

38 

39 

40def get_step_id(step: 'AbstractStep') -> str: 

41 """ 

42 Generate a stable step ID from a step object reference. 

43 

44 This function provides a deterministic way to derive a step's ID 

45 from its object reference, enabling stateless execution where 

46 step objects don't need to store their own IDs as attributes. 

47 

48 Args: 

49 step: The step object to generate an ID for 

50 

51 Returns: 

52 A stable string ID based on the step object's identity 

53 

54 Note: 

55 This uses the same algorithm as step.__init__() to ensure 

56 consistency between compilation and execution phases. 

57 """ 

58 return str(id(step)) 

59 

60 

61class AbstractStep(abc.ABC): 

62 """ 

63 Abstract base class for all steps in the OpenHCS pipeline. 

64 

65 This class defines the interface that all steps must implement. 

66 Steps are stateful during pipeline definition and compilation (holding attributes 

67 like name, input/output memory types, etc.). After compilation, these attributes 

68 are stripped by the StepAttributeStripper, and the step instances become 

69 stateless shells. During execution, steps operate solely based on the 

70 ProcessingContext (which is frozen) and their specific plan within 

71 context.step_plans. 

72 

73 Input Source Control: 

74 

75 The input_source parameter controls where a step reads its input data: 

76 

77 - InputSource.PREVIOUS_STEP (default): Standard pipeline chaining where the step 

78 reads from the output directory of the previous step. This maintains normal 

79 sequential data flow. 

80 

81 - InputSource.PIPELINE_START: The step reads from the original pipeline input 

82 directory, bypassing all previous step outputs. This replaces the @chain_breaker 

83 decorator functionality and is used for position generation and quality control. 

84 

85 Usage Examples: 

86 

87 Standard processing step (default): 

88 ```python 

89 step = FunctionStep( 

90 func=my_processing_function, 

91 name="process_images" 

92 # input_source defaults to InputSource.PREVIOUS_STEP 

93 ) 

94 ``` 

95 

96 Position generation accessing original images: 

97 ```python 

98 step = FunctionStep( 

99 func=ashlar_compute_tile_positions_gpu, 

100 name="compute_positions", 

101 input_source=InputSource.PIPELINE_START 

102 ) 

103 ``` 

104 

105 # Clause 3 — Declarative Primacy 

106 # Clause 66 — Immutability After Construction 

107 # Clause 88 — No Inferred Capabilities 

108 # Clause 106-A — Declared Memory Types 

109 # Clause 246 — Statelessness Mandate 

110 # Clause 251 — Declarative Memory Conversion 

111 # Clause 503 — Cognitive Load Transfer 

112 """ 

113 

114 

115 

116 # Step metadata - these are primarily used during pipeline definition and compilation 

117 step_id: str 

118 enabled: bool = True 

119 description: Optional[str] = None 

120 name: str # Made non-optional, defaults to class name 

121 

122 # Attributes like input_memory_type, output_memory_type, etc., 

123 # are defined in concrete subclasses (e.g., FunctionStep) as needed. 

124 

125 def __init__( 

126 self, 

127 *, # Force keyword-only arguments 

128 name: Optional[str] = None, 

129 variable_components: List[VariableComponents] = DEFAULT_VARIABLE_COMPONENTS, 

130 group_by: Optional[GroupBy] = None, 

131 __input_dir__: Optional[Union[str,Path]] = None, # Internal: Used during path planning 

132 __output_dir__: Optional[Union[str,Path]] = None, # Internal: Used during path planning 

133 input_source: InputSource = InputSource.PREVIOUS_STEP, 

134 materialization_config: Optional['MaterializationPathConfig'] = None 

135 ) -> None: 

136 """ 

137 Initialize a step. These attributes are primarily used during the 

138 pipeline definition and compilation phase. After compilation, step 

139 instances are stripped of these attributes by StepAttributeStripper 

140 to enforce statelessness during execution. 

141 

142 Args: 

143 name: Human-readable name for the step. Defaults to class name. 

144 variable_components: List of variable components for this step. 

145 group_by: Optional grouping hint for step execution. 

146 __input_dir__: Internal hint for input directory, used by path planner. 

147 Dunder naming indicates this is a compiler-internal field. 

148 __output_dir__: Internal hint for output directory, used by path planner. 

149 Dunder naming indicates this is a compiler-internal field. 

150 input_source: Input source strategy for this step. Defaults to PREVIOUS_STEP 

151 for normal pipeline chaining. Use PIPELINE_START to access 

152 original input data (replaces @chain_breaker decorator). 

153 materialization_config: Optional PathPlanningConfig or MaterializationPathConfig for per-step materialized output. 

154 When provided, enables saving materialized copy of step output 

155 to custom location in addition to normal memory backend processing. 

156 Use MaterializationPathConfig() for safe defaults that prevent path collisions. 

157 """ 

158 self.name = name or self.__class__.__name__ 

159 self.variable_components = variable_components 

160 self.group_by = group_by 

161 self.__input_dir__ = __input_dir__ 

162 self.__output_dir__ = __output_dir__ 

163 self.input_source = input_source 

164 self.materialization_config = materialization_config 

165 

166 # Generate a stable step_id based on object id at instantiation. 

167 # This ID is used to link the step object to its plan in the context. 

168 self.step_id = str(id(self)) 

169 

170 logger_instance = logging.getLogger(__name__) 

171 logger_instance.debug(f"Created step '{self.name}' (type: {self.__class__.__name__}) with ID {self.step_id}") 

172 

173 @abc.abstractmethod 

174 def process(self, context: 'ProcessingContext') -> None: 

175 """ 

176 Process the step with the given context. 

177 

178 This method must be implemented by all step subclasses. 

179 During execution, the step instance is stateless. All necessary 

180 configuration and paths are retrieved from `context.step_plans[self.step_id]`. 

181 The `context` itself is frozen and must not be modified. 

182 Outputs are written to VFS via `context.filemanager` based on the step's plan. 

183 This method returns None. 

184 

185 Args: 

186 context: The frozen ProcessingContext containing all required fields, 

187 including step_plans and filemanager. 

188 

189 # Clause 246 — Statelessness Mandate 

190 # Clause 21 — Context Immunity (Context is read-only for steps) 

191 """ 

192 raise NotImplementedError("AbstractStep.process() must be implemented by subclasses")