Coverage for openhcs/textual_tui/widgets/shared/signature_analyzer.py: 0.0%
339 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-14 05:57 +0000
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-14 05:57 +0000
1# File: openhcs/textual_tui/widgets/shared/signature_analyzer.py
3import ast
4import inspect
5import dataclasses
6import re
7from typing import Any, Dict, Callable, get_type_hints, NamedTuple, Union, Optional, Type
8from dataclasses import dataclass
10@dataclass(frozen=True)
11class AnalysisConstants:
12 """Constants for signature analysis to eliminate magic strings."""
13 INIT_METHOD_SUFFIX: str = ".__init__"
14 SELF_PARAM: str = "self"
15 CLS_PARAM: str = "cls"
16 DUNDER_PREFIX: str = "__"
17 DUNDER_SUFFIX: str = "__"
20# Create constants instance for use throughout the module
21CONSTANTS = AnalysisConstants()
24class ParameterInfo(NamedTuple):
25 """Information about a parameter."""
26 name: str
27 param_type: type
28 default_value: Any
29 is_required: bool
30 description: Optional[str] = None # Add parameter description from docstring
32class DocstringInfo(NamedTuple):
33 """Information extracted from a docstring."""
34 summary: Optional[str] = None # First line or brief description
35 description: Optional[str] = None # Full description
36 parameters: Dict[str, str] = None # Parameter name -> description mapping
37 returns: Optional[str] = None # Return value description
38 examples: Optional[str] = None # Usage examples
40class DocstringExtractor:
41 """Extract structured information from docstrings."""
43 @staticmethod
44 def extract(target: Union[Callable, type]) -> DocstringInfo:
45 """Extract docstring information from function or class.
47 Args:
48 target: Function, method, or class to extract docstring from
50 Returns:
51 DocstringInfo with parsed docstring components
52 """
53 if not target:
54 return DocstringInfo()
56 docstring = inspect.getdoc(target)
57 if not docstring:
58 return DocstringInfo()
60 # Try AST-based parsing first for better accuracy
61 try:
62 return DocstringExtractor._parse_docstring_ast(target, docstring)
63 except Exception:
64 # Fall back to regex-based parsing
65 return DocstringExtractor._parse_docstring(docstring)
67 @staticmethod
68 def _parse_docstring_ast(target: Union[Callable, type], docstring: str) -> DocstringInfo:
69 """Parse docstring using AST for more accurate extraction.
71 This method uses AST to parse the source code and extract docstring
72 information more accurately, especially for complex multiline descriptions.
73 """
74 try:
75 # Get source code
76 source = inspect.getsource(target)
77 tree = ast.parse(source)
79 # Find the function/class node
80 for node in ast.walk(tree):
81 if isinstance(node, (ast.FunctionDef, ast.ClassDef)):
82 if ast.get_docstring(node) == docstring:
83 return DocstringExtractor._parse_ast_docstring(node, docstring)
85 # Fallback to regex parsing if AST parsing fails
86 return DocstringExtractor._parse_docstring(docstring)
88 except Exception:
89 # Fallback to regex parsing
90 return DocstringExtractor._parse_docstring(docstring)
92 @staticmethod
93 def _parse_ast_docstring(node: Union[ast.FunctionDef, ast.ClassDef], docstring: str) -> DocstringInfo:
94 """Parse docstring from AST node with enhanced multiline support."""
95 # For now, use the improved regex parser
96 # This can be extended later with more sophisticated AST-based parsing
97 return DocstringExtractor._parse_docstring(docstring)
99 @staticmethod
100 def _parse_docstring(docstring: str) -> DocstringInfo:
101 """Parse a docstring into structured components with improved multiline support.
103 Supports multiple docstring formats:
104 - Google style (Args:, Returns:, Examples:)
105 - NumPy style (Parameters, Returns, Examples)
106 - Sphinx style (:param name:, :returns:)
107 - Simple format (just description)
109 Uses improved parsing for multiline parameter descriptions that continues
110 until a blank line or new parameter/section is encountered.
111 """
112 lines = docstring.strip().split('\n')
114 summary = None
115 description_lines = []
116 parameters = {}
117 returns = None
118 examples = None
120 current_section = 'description'
121 current_param = None
122 current_param_lines = []
124 def _finalize_current_param():
125 """Finalize the current parameter description."""
126 if current_param and current_param_lines:
127 param_desc = '\n'.join(current_param_lines).strip()
128 parameters[current_param] = param_desc
130 for i, line in enumerate(lines):
131 original_line = line
132 line = line.strip()
134 # Handle both Google/Sphinx style (with colons) and NumPy style (without colons)
135 if line.lower() in ('args:', 'arguments:', 'parameters:'):
136 _finalize_current_param()
137 current_param = None
138 current_param_lines = []
139 current_section = 'parameters'
140 if i + 1 < len(lines) and lines[i+1].strip().startswith('---'): # Skip NumPy style separator
141 continue
142 continue
143 elif line.lower() in ('args', 'arguments', 'parameters') and i + 1 < len(lines) and lines[i+1].strip().startswith('-'):
144 # NumPy-style section headers (without colons, followed by dashes)
145 _finalize_current_param()
146 current_param = None
147 current_param_lines = []
148 current_section = 'parameters'
149 continue
150 elif line.lower() in ('returns:', 'return:'):
151 _finalize_current_param()
152 current_param = None
153 current_param_lines = []
154 current_section = 'returns'
155 if i + 1 < len(lines) and lines[i+1].strip().startswith('---'): # Skip NumPy style separator
156 continue
157 continue
158 elif line.lower() in ('returns', 'return') and i + 1 < len(lines) and lines[i+1].strip().startswith('-'):
159 # NumPy-style returns section
160 _finalize_current_param()
161 current_param = None
162 current_param_lines = []
163 current_section = 'returns'
164 continue
165 elif line.lower() in ('examples:', 'example:'):
166 _finalize_current_param()
167 current_param = None
168 current_param_lines = []
169 current_section = 'examples'
170 if i + 1 < len(lines) and lines[i+1].strip().startswith('---'): # Skip NumPy style separator
171 continue
172 continue
173 elif line.lower() in ('examples', 'example') and i + 1 < len(lines) and lines[i+1].strip().startswith('-'):
174 # NumPy-style examples section
175 _finalize_current_param()
176 current_param = None
177 current_param_lines = []
178 current_section = 'examples'
179 continue
181 if current_section == 'description':
182 if not summary and line:
183 summary = line
184 else:
185 description_lines.append(original_line) # Keep original indentation
187 elif current_section == 'parameters':
188 # Enhanced parameter parsing to handle multiple formats
189 param_match_google = re.match(r'^(\w+):\s*(.+)', line)
190 param_match_sphinx = re.match(r'^:param\s+(\w+):\s*(.+)', line)
191 param_match_numpy = re.match(r'^(\w+)\s*:\s*(.+)', line)
192 # New: Handle pyclesperanto-style inline parameters (param_name: type description)
193 param_match_inline = re.match(r'^(\w+):\s*(\w+(?:\[.*?\])?|\w+(?:\s*\|\s*\w+)*)\s+(.+)', line)
194 # New: Handle parameters that start with bullet points or dashes
195 param_match_bullet = re.match(r'^[-•*]\s*(\w+):\s*(.+)', line)
197 if param_match_google or param_match_sphinx or param_match_numpy or param_match_inline or param_match_bullet:
198 _finalize_current_param()
200 if param_match_google:
201 param_name, param_desc = param_match_google.groups()
202 elif param_match_sphinx:
203 param_name, param_desc = param_match_sphinx.groups()
204 elif param_match_numpy:
205 param_name, param_desc = param_match_numpy.groups()
206 elif param_match_inline:
207 param_name, param_type, param_desc = param_match_inline.groups()
208 param_desc = f"{param_type} - {param_desc}" # Include type in description
209 elif param_match_bullet:
210 param_name, param_desc = param_match_bullet.groups()
212 current_param = param_name
213 current_param_lines = [param_desc.strip()]
214 elif current_param and (original_line.startswith(' ') or original_line.startswith('\t')):
215 # Indented continuation line
216 current_param_lines.append(line)
217 elif not line:
218 _finalize_current_param()
219 current_param = None
220 current_param_lines = []
221 elif current_param:
222 # Non-indented continuation line (part of the same block)
223 current_param_lines.append(line)
224 else:
225 # Try to parse inline parameter definitions in a single block
226 # This handles cases where parameters are listed without clear separation
227 inline_params = DocstringExtractor._parse_inline_parameters(line)
228 for param_name, param_desc in inline_params.items():
229 parameters[param_name] = param_desc
231 elif current_section == 'returns':
232 if returns is None:
233 returns = line
234 else:
235 returns += '\n' + line
237 elif current_section == 'examples':
238 if examples is None:
239 examples = line
240 else:
241 examples += '\n' + line
243 _finalize_current_param()
245 description = '\n'.join(description_lines).strip()
246 if description == summary:
247 description = None
249 return DocstringInfo(
250 summary=summary,
251 description=description,
252 parameters=parameters or {},
253 returns=returns,
254 examples=examples
255 )
257 @staticmethod
258 def _parse_inline_parameters(line: str) -> Dict[str, str]:
259 """Parse parameters from a single line containing multiple parameter definitions.
261 Handles formats like:
262 - "input_image: Image Input image to process. footprint: Image Structuring element..."
263 - "param1: type1 description1. param2: type2 description2."
264 """
265 parameters = {}
267 import re
269 # Strategy: Use a flexible pattern that works with the pyclesperanto format
270 # Pattern matches: param_name: everything up to the next param_name: or end of string
271 param_pattern = r'(\w+):\s*([^:]*?)(?=\s+\w+:|$)'
272 matches = re.findall(param_pattern, line)
274 for param_name, param_desc in matches:
275 if param_desc.strip():
276 # Clean up the description (remove trailing periods, extra whitespace)
277 clean_desc = param_desc.strip().rstrip('.')
278 parameters[param_name] = clean_desc
280 return parameters
283class SignatureAnalyzer:
284 """Universal analyzer for extracting parameter information from any target."""
286 @staticmethod
287 def analyze(target: Union[Callable, Type, object], skip_first_param: Optional[bool] = None) -> Dict[str, ParameterInfo]:
288 """Extract parameter information from any target: function, constructor, dataclass, or instance.
290 Args:
291 target: Function, constructor, dataclass type, or dataclass instance
292 skip_first_param: Whether to skip the first parameter (after self/cls).
293 If None, auto-detects based on context:
294 - False for step constructors (all params are configuration)
295 - True for image processing functions (first param is image data)
297 Returns:
298 Dict mapping parameter names to ParameterInfo
299 """
300 if not target:
301 return {}
303 # Dispatch based on target type
304 if inspect.isclass(target):
305 if dataclasses.is_dataclass(target):
306 return SignatureAnalyzer._analyze_dataclass(target)
307 else:
308 # Try to analyze constructor
309 return SignatureAnalyzer._analyze_callable(target.__init__, skip_first_param)
310 elif dataclasses.is_dataclass(target):
311 # Instance of dataclass
312 return SignatureAnalyzer._analyze_dataclass_instance(target)
313 else:
314 # Function, method, or other callable
315 return SignatureAnalyzer._analyze_callable(target, skip_first_param)
317 @staticmethod
318 def _analyze_callable(callable_obj: Callable, skip_first_param: Optional[bool] = None) -> Dict[str, ParameterInfo]:
319 """Extract parameter information from callable signature.
321 Args:
322 callable_obj: The callable to analyze
323 skip_first_param: Whether to skip the first parameter (after self/cls).
324 If None, auto-detects based on context.
325 """
326 sig = inspect.signature(callable_obj)
327 type_hints = get_type_hints(callable_obj)
329 # Extract docstring information (with fallback for robustness)
330 try:
331 docstring_info = DocstringExtractor.extract(callable_obj)
332 except:
333 docstring_info = None
335 if not docstring_info:
336 docstring_info = DocstringInfo()
338 parameters = {}
339 param_list = list(sig.parameters.items())
341 # Determine skip behavior: explicit parameter overrides auto-detection
342 should_skip_first_param = (
343 skip_first_param if skip_first_param is not None
344 else SignatureAnalyzer._should_skip_first_parameter(callable_obj)
345 )
347 first_param_after_self_skipped = False
349 for i, (param_name, param) in enumerate(param_list):
350 # Always skip self/cls
351 if param_name in (CONSTANTS.SELF_PARAM, CONSTANTS.CLS_PARAM):
352 continue
354 # Always skip dunder parameters (internal/reserved fields)
355 if param_name.startswith(CONSTANTS.DUNDER_PREFIX) and param_name.endswith(CONSTANTS.DUNDER_SUFFIX):
356 continue
358 # Skip first parameter for image processing functions only
359 if should_skip_first_param and not first_param_after_self_skipped:
360 first_param_after_self_skipped = True
361 continue
363 # Handle **kwargs parameters - try to extract original function signature
364 if param.kind == inspect.Parameter.VAR_KEYWORD:
365 # Try to find the original function if this is a wrapper
366 original_params = SignatureAnalyzer._extract_original_parameters(callable_obj)
367 if original_params:
368 parameters.update(original_params)
369 continue
371 from typing import Any
372 param_type = type_hints.get(param_name, Any)
373 default_value = param.default if param.default != inspect.Parameter.empty else None
374 is_required = param.default == inspect.Parameter.empty
376 # Get parameter description from docstring
377 param_description = docstring_info.parameters.get(param_name) if docstring_info else None
379 parameters[param_name] = ParameterInfo(
380 name=param_name,
381 param_type=param_type,
382 default_value=default_value,
383 is_required=is_required,
384 description=param_description
385 )
387 return parameters
389 @staticmethod
390 def _should_skip_first_parameter(callable_obj: Callable) -> bool:
391 """
392 Determine if the first parameter should be skipped for any callable.
394 Universal logic that works with any object:
395 - Constructors (__init__ methods): don't skip (all params are configuration)
396 - All other callables: skip first param (assume it's data being processed)
397 """
398 # Check if this is any __init__ method (constructor)
399 if (hasattr(callable_obj, '__qualname__') and
400 callable_obj.__qualname__.endswith(CONSTANTS.INIT_METHOD_SUFFIX)):
401 return False
403 # Everything else: skip first parameter
404 return True
406 @staticmethod
407 def _extract_original_parameters(callable_obj: Callable) -> Dict[str, ParameterInfo]:
408 """
409 Extract parameters from the original function if this is a wrapper with **kwargs.
411 This handles cases where scikit-image or other auto-registered functions
412 are wrapped with (image, **kwargs) signatures.
413 """
414 try:
415 # Check if this function has access to the original function
416 # Common patterns: __wrapped__, closure variables, etc.
418 # Pattern 1: Check if it's a functools.wraps wrapper
419 if hasattr(callable_obj, '__wrapped__'):
420 return SignatureAnalyzer._analyze_callable(callable_obj.__wrapped__)
422 # Pattern 2: Check closure for original function reference
423 if hasattr(callable_obj, '__closure__') and callable_obj.__closure__:
424 for cell in callable_obj.__closure__:
425 if hasattr(cell.cell_contents, '__call__'):
426 # Found a callable in closure - might be the original function
427 try:
428 orig_sig = inspect.signature(cell.cell_contents)
429 # Skip if it also has **kwargs (avoid infinite recursion)
430 if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in orig_sig.parameters.values()):
431 continue
432 return SignatureAnalyzer._analyze_callable(cell.cell_contents)
433 except:
434 continue
436 # Pattern 3: Try to extract from function name and module
437 # This is a fallback for scikit-image functions
438 if hasattr(callable_obj, '__name__') and hasattr(callable_obj, '__module__'):
439 func_name = callable_obj.__name__
440 module_name = callable_obj.__module__
442 # Try to find the original function in scikit-image
443 if 'skimage' in module_name:
444 try:
445 import importlib
446 # Extract the actual module path (remove wrapper module parts)
447 if 'scikit_image_registry' in module_name:
448 # This is our wrapper, try to find the original in skimage
449 for skimage_module in ['skimage.filters', 'skimage.morphology',
450 'skimage.segmentation', 'skimage.feature',
451 'skimage.measure', 'skimage.transform',
452 'skimage.restoration', 'skimage.exposure']:
453 try:
454 mod = importlib.import_module(skimage_module)
455 if hasattr(mod, func_name):
456 orig_func = getattr(mod, func_name)
457 return SignatureAnalyzer._analyze_callable(orig_func)
458 except:
459 continue
460 except:
461 pass
463 return {}
465 except Exception:
466 return {}
468 @staticmethod
469 def _analyze_dataclass(dataclass_type: type) -> Dict[str, ParameterInfo]:
470 """Extract parameter information from dataclass fields."""
471 try:
472 type_hints = get_type_hints(dataclass_type)
474 # Extract docstring information from dataclass
475 docstring_info = DocstringExtractor.extract(dataclass_type)
477 # Extract inline field documentation using AST
478 inline_docs = SignatureAnalyzer._extract_inline_field_docs(dataclass_type)
480 parameters = {}
482 for field in dataclasses.fields(dataclass_type):
483 param_type = type_hints.get(field.name, str)
485 # Get default value
486 if field.default != dataclasses.MISSING:
487 default_value = field.default
488 is_required = False
489 elif field.default_factory != dataclasses.MISSING:
490 default_value = field.default_factory()
491 is_required = False
492 else:
493 default_value = None
494 is_required = True
496 # Get field description from multiple sources (priority order)
497 field_description = None
499 # 1. Field metadata (highest priority)
500 if hasattr(field, 'metadata') and 'description' in field.metadata:
501 field_description = field.metadata['description']
502 # 2. Inline documentation strings (new!)
503 elif field.name in inline_docs:
504 field_description = inline_docs[field.name]
505 # 3. Docstring parameters (fallback)
506 else:
507 field_description = docstring_info.parameters.get(field.name)
509 parameters[field.name] = ParameterInfo(
510 name=field.name,
511 param_type=param_type,
512 default_value=default_value,
513 is_required=is_required,
514 description=field_description
515 )
517 return parameters
519 except Exception:
520 # Return empty dict on error
521 return {}
523 @staticmethod
524 def _extract_inline_field_docs(dataclass_type: type) -> Dict[str, str]:
525 """Extract inline field documentation strings using AST parsing.
527 This handles multiple patterns used for field documentation:
529 Pattern 1 - Next line string literal:
530 @dataclass
531 class Config:
532 field_name: str = "default"
533 '''Field description here.'''
535 Pattern 2 - Same line string literal (less common):
536 @dataclass
537 class Config:
538 field_name: str = "default" # '''Field description'''
540 Pattern 3 - Traditional docstring parameters (handled by DocstringExtractor):
541 @dataclass
542 class Config:
543 '''
544 Args:
545 field_name: Field description here.
546 '''
547 field_name: str = "default"
548 """
549 try:
550 import ast
551 import re
553 source = inspect.getsource(dataclass_type)
554 tree = ast.parse(source)
556 # Find the class definition
557 class_node = None
558 for node in ast.walk(tree):
559 if isinstance(node, ast.ClassDef) and node.name == dataclass_type.__name__:
560 class_node = node
561 break
563 if not class_node:
564 return {}
566 field_docs = {}
567 source_lines = source.split('\n')
569 # Method 1: Look for field assignments followed by string literals (next line)
570 for i, node in enumerate(class_node.body):
571 if isinstance(node, ast.AnnAssign) and hasattr(node.target, 'id'):
572 field_name = node.target.id
574 # Check if the next node is a string literal (documentation)
575 if i + 1 < len(class_node.body):
576 next_node = class_node.body[i + 1]
577 if isinstance(next_node, ast.Expr) and isinstance(next_node.value, ast.Constant):
578 if isinstance(next_node.value.value, str):
579 field_docs[field_name] = next_node.value.value.strip()
580 continue
582 # Method 2: Check for inline comments on the same line
583 # Get the line number of the field definition
584 field_line_num = node.lineno - 1 # Convert to 0-based indexing
585 if 0 <= field_line_num < len(source_lines):
586 line = source_lines[field_line_num]
588 # Look for string literals in comments on the same line
589 # Pattern: field: type = value # """Documentation"""
590 comment_match = re.search(r'#\s*["\']([^"\']+)["\']', line)
591 if comment_match:
592 field_docs[field_name] = comment_match.group(1).strip()
593 continue
595 # Look for triple-quoted strings on the same line
596 # Pattern: field: type = value """Documentation"""
597 triple_quote_match = re.search(r'"""([^"]+)"""|\'\'\'([^\']+)\'\'\'', line)
598 if triple_quote_match:
599 doc_text = triple_quote_match.group(1) or triple_quote_match.group(2)
600 field_docs[field_name] = doc_text.strip()
602 return field_docs
604 except Exception as e:
605 # Return empty dict if AST parsing fails
606 # Could add logging here for debugging: logger.debug(f"AST parsing failed: {e}")
607 return {}
609 @staticmethod
610 def _analyze_dataclass_instance(instance: object) -> Dict[str, ParameterInfo]:
611 """Extract parameter information from a dataclass instance."""
612 try:
613 # Get the type and analyze it
614 dataclass_type = type(instance)
615 parameters = SignatureAnalyzer._analyze_dataclass(dataclass_type)
617 # Update default values with current instance values
618 # For lazy dataclasses, use object.__getattribute__ to preserve None values for placeholders
619 for name, param_info in parameters.items():
620 if hasattr(instance, name):
621 # Check if this is a lazy dataclass that should preserve None values
622 if hasattr(instance, '_resolve_field_value'):
623 # This is a lazy dataclass - use object.__getattribute__ to get stored value
624 current_value = object.__getattribute__(instance, name)
625 else:
626 # Regular dataclass - use normal getattr
627 current_value = getattr(instance, name)
629 # Create new ParameterInfo with current value as default
630 parameters[name] = ParameterInfo(
631 name=param_info.name,
632 param_type=param_info.param_type,
633 default_value=current_value,
634 is_required=param_info.is_required,
635 description=param_info.description
636 )
638 return parameters
640 except Exception:
641 return {}
643 # Duplicate method removed - using the fixed version above