Coverage for openhcs/textual_tui/widgets/shared/signature_analyzer.py: 0.0%

339 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-14 05:57 +0000

1# File: openhcs/textual_tui/widgets/shared/signature_analyzer.py 

2 

3import ast 

4import inspect 

5import dataclasses 

6import re 

7from typing import Any, Dict, Callable, get_type_hints, NamedTuple, Union, Optional, Type 

8from dataclasses import dataclass 

9 

10@dataclass(frozen=True) 

11class AnalysisConstants: 

12 """Constants for signature analysis to eliminate magic strings.""" 

13 INIT_METHOD_SUFFIX: str = ".__init__" 

14 SELF_PARAM: str = "self" 

15 CLS_PARAM: str = "cls" 

16 DUNDER_PREFIX: str = "__" 

17 DUNDER_SUFFIX: str = "__" 

18 

19 

20# Create constants instance for use throughout the module 

21CONSTANTS = AnalysisConstants() 

22 

23 

24class ParameterInfo(NamedTuple): 

25 """Information about a parameter.""" 

26 name: str 

27 param_type: type 

28 default_value: Any 

29 is_required: bool 

30 description: Optional[str] = None # Add parameter description from docstring 

31 

32class DocstringInfo(NamedTuple): 

33 """Information extracted from a docstring.""" 

34 summary: Optional[str] = None # First line or brief description 

35 description: Optional[str] = None # Full description 

36 parameters: Dict[str, str] = None # Parameter name -> description mapping 

37 returns: Optional[str] = None # Return value description 

38 examples: Optional[str] = None # Usage examples 

39 

40class DocstringExtractor: 

41 """Extract structured information from docstrings.""" 

42 

43 @staticmethod 

44 def extract(target: Union[Callable, type]) -> DocstringInfo: 

45 """Extract docstring information from function or class. 

46 

47 Args: 

48 target: Function, method, or class to extract docstring from 

49 

50 Returns: 

51 DocstringInfo with parsed docstring components 

52 """ 

53 if not target: 

54 return DocstringInfo() 

55 

56 docstring = inspect.getdoc(target) 

57 if not docstring: 

58 return DocstringInfo() 

59 

60 # Try AST-based parsing first for better accuracy 

61 try: 

62 return DocstringExtractor._parse_docstring_ast(target, docstring) 

63 except Exception: 

64 # Fall back to regex-based parsing 

65 return DocstringExtractor._parse_docstring(docstring) 

66 

67 @staticmethod 

68 def _parse_docstring_ast(target: Union[Callable, type], docstring: str) -> DocstringInfo: 

69 """Parse docstring using AST for more accurate extraction. 

70 

71 This method uses AST to parse the source code and extract docstring 

72 information more accurately, especially for complex multiline descriptions. 

73 """ 

74 try: 

75 # Get source code 

76 source = inspect.getsource(target) 

77 tree = ast.parse(source) 

78 

79 # Find the function/class node 

80 for node in ast.walk(tree): 

81 if isinstance(node, (ast.FunctionDef, ast.ClassDef)): 

82 if ast.get_docstring(node) == docstring: 

83 return DocstringExtractor._parse_ast_docstring(node, docstring) 

84 

85 # Fallback to regex parsing if AST parsing fails 

86 return DocstringExtractor._parse_docstring(docstring) 

87 

88 except Exception: 

89 # Fallback to regex parsing 

90 return DocstringExtractor._parse_docstring(docstring) 

91 

92 @staticmethod 

93 def _parse_ast_docstring(node: Union[ast.FunctionDef, ast.ClassDef], docstring: str) -> DocstringInfo: 

94 """Parse docstring from AST node with enhanced multiline support.""" 

95 # For now, use the improved regex parser 

96 # This can be extended later with more sophisticated AST-based parsing 

97 return DocstringExtractor._parse_docstring(docstring) 

98 

99 @staticmethod 

100 def _parse_docstring(docstring: str) -> DocstringInfo: 

101 """Parse a docstring into structured components with improved multiline support. 

102 

103 Supports multiple docstring formats: 

104 - Google style (Args:, Returns:, Examples:) 

105 - NumPy style (Parameters, Returns, Examples) 

106 - Sphinx style (:param name:, :returns:) 

107 - Simple format (just description) 

108 

109 Uses improved parsing for multiline parameter descriptions that continues 

110 until a blank line or new parameter/section is encountered. 

111 """ 

112 lines = docstring.strip().split('\n') 

113 

114 summary = None 

115 description_lines = [] 

116 parameters = {} 

117 returns = None 

118 examples = None 

119 

120 current_section = 'description' 

121 current_param = None 

122 current_param_lines = [] 

123 

124 def _finalize_current_param(): 

125 """Finalize the current parameter description.""" 

126 if current_param and current_param_lines: 

127 param_desc = '\n'.join(current_param_lines).strip() 

128 parameters[current_param] = param_desc 

129 

130 for i, line in enumerate(lines): 

131 original_line = line 

132 line = line.strip() 

133 

134 # Handle both Google/Sphinx style (with colons) and NumPy style (without colons) 

135 if line.lower() in ('args:', 'arguments:', 'parameters:'): 

136 _finalize_current_param() 

137 current_param = None 

138 current_param_lines = [] 

139 current_section = 'parameters' 

140 if i + 1 < len(lines) and lines[i+1].strip().startswith('---'): # Skip NumPy style separator 

141 continue 

142 continue 

143 elif line.lower() in ('args', 'arguments', 'parameters') and i + 1 < len(lines) and lines[i+1].strip().startswith('-'): 

144 # NumPy-style section headers (without colons, followed by dashes) 

145 _finalize_current_param() 

146 current_param = None 

147 current_param_lines = [] 

148 current_section = 'parameters' 

149 continue 

150 elif line.lower() in ('returns:', 'return:'): 

151 _finalize_current_param() 

152 current_param = None 

153 current_param_lines = [] 

154 current_section = 'returns' 

155 if i + 1 < len(lines) and lines[i+1].strip().startswith('---'): # Skip NumPy style separator 

156 continue 

157 continue 

158 elif line.lower() in ('returns', 'return') and i + 1 < len(lines) and lines[i+1].strip().startswith('-'): 

159 # NumPy-style returns section 

160 _finalize_current_param() 

161 current_param = None 

162 current_param_lines = [] 

163 current_section = 'returns' 

164 continue 

165 elif line.lower() in ('examples:', 'example:'): 

166 _finalize_current_param() 

167 current_param = None 

168 current_param_lines = [] 

169 current_section = 'examples' 

170 if i + 1 < len(lines) and lines[i+1].strip().startswith('---'): # Skip NumPy style separator 

171 continue 

172 continue 

173 elif line.lower() in ('examples', 'example') and i + 1 < len(lines) and lines[i+1].strip().startswith('-'): 

174 # NumPy-style examples section 

175 _finalize_current_param() 

176 current_param = None 

177 current_param_lines = [] 

178 current_section = 'examples' 

179 continue 

180 

181 if current_section == 'description': 

182 if not summary and line: 

183 summary = line 

184 else: 

185 description_lines.append(original_line) # Keep original indentation 

186 

187 elif current_section == 'parameters': 

188 # Enhanced parameter parsing to handle multiple formats 

189 param_match_google = re.match(r'^(\w+):\s*(.+)', line) 

190 param_match_sphinx = re.match(r'^:param\s+(\w+):\s*(.+)', line) 

191 param_match_numpy = re.match(r'^(\w+)\s*:\s*(.+)', line) 

192 # New: Handle pyclesperanto-style inline parameters (param_name: type description) 

193 param_match_inline = re.match(r'^(\w+):\s*(\w+(?:\[.*?\])?|\w+(?:\s*\|\s*\w+)*)\s+(.+)', line) 

194 # New: Handle parameters that start with bullet points or dashes 

195 param_match_bullet = re.match(r'^[-•*]\s*(\w+):\s*(.+)', line) 

196 

197 if param_match_google or param_match_sphinx or param_match_numpy or param_match_inline or param_match_bullet: 

198 _finalize_current_param() 

199 

200 if param_match_google: 

201 param_name, param_desc = param_match_google.groups() 

202 elif param_match_sphinx: 

203 param_name, param_desc = param_match_sphinx.groups() 

204 elif param_match_numpy: 

205 param_name, param_desc = param_match_numpy.groups() 

206 elif param_match_inline: 

207 param_name, param_type, param_desc = param_match_inline.groups() 

208 param_desc = f"{param_type} - {param_desc}" # Include type in description 

209 elif param_match_bullet: 

210 param_name, param_desc = param_match_bullet.groups() 

211 

212 current_param = param_name 

213 current_param_lines = [param_desc.strip()] 

214 elif current_param and (original_line.startswith(' ') or original_line.startswith('\t')): 

215 # Indented continuation line 

216 current_param_lines.append(line) 

217 elif not line: 

218 _finalize_current_param() 

219 current_param = None 

220 current_param_lines = [] 

221 elif current_param: 

222 # Non-indented continuation line (part of the same block) 

223 current_param_lines.append(line) 

224 else: 

225 # Try to parse inline parameter definitions in a single block 

226 # This handles cases where parameters are listed without clear separation 

227 inline_params = DocstringExtractor._parse_inline_parameters(line) 

228 for param_name, param_desc in inline_params.items(): 

229 parameters[param_name] = param_desc 

230 

231 elif current_section == 'returns': 

232 if returns is None: 

233 returns = line 

234 else: 

235 returns += '\n' + line 

236 

237 elif current_section == 'examples': 

238 if examples is None: 

239 examples = line 

240 else: 

241 examples += '\n' + line 

242 

243 _finalize_current_param() 

244 

245 description = '\n'.join(description_lines).strip() 

246 if description == summary: 

247 description = None 

248 

249 return DocstringInfo( 

250 summary=summary, 

251 description=description, 

252 parameters=parameters or {}, 

253 returns=returns, 

254 examples=examples 

255 ) 

256 

257 @staticmethod 

258 def _parse_inline_parameters(line: str) -> Dict[str, str]: 

259 """Parse parameters from a single line containing multiple parameter definitions. 

260 

261 Handles formats like: 

262 - "input_image: Image Input image to process. footprint: Image Structuring element..." 

263 - "param1: type1 description1. param2: type2 description2." 

264 """ 

265 parameters = {} 

266 

267 import re 

268 

269 # Strategy: Use a flexible pattern that works with the pyclesperanto format 

270 # Pattern matches: param_name: everything up to the next param_name: or end of string 

271 param_pattern = r'(\w+):\s*([^:]*?)(?=\s+\w+:|$)' 

272 matches = re.findall(param_pattern, line) 

273 

274 for param_name, param_desc in matches: 

275 if param_desc.strip(): 

276 # Clean up the description (remove trailing periods, extra whitespace) 

277 clean_desc = param_desc.strip().rstrip('.') 

278 parameters[param_name] = clean_desc 

279 

280 return parameters 

281 

282 

283class SignatureAnalyzer: 

284 """Universal analyzer for extracting parameter information from any target.""" 

285 

286 @staticmethod 

287 def analyze(target: Union[Callable, Type, object], skip_first_param: Optional[bool] = None) -> Dict[str, ParameterInfo]: 

288 """Extract parameter information from any target: function, constructor, dataclass, or instance. 

289 

290 Args: 

291 target: Function, constructor, dataclass type, or dataclass instance 

292 skip_first_param: Whether to skip the first parameter (after self/cls). 

293 If None, auto-detects based on context: 

294 - False for step constructors (all params are configuration) 

295 - True for image processing functions (first param is image data) 

296 

297 Returns: 

298 Dict mapping parameter names to ParameterInfo 

299 """ 

300 if not target: 

301 return {} 

302 

303 # Dispatch based on target type 

304 if inspect.isclass(target): 

305 if dataclasses.is_dataclass(target): 

306 return SignatureAnalyzer._analyze_dataclass(target) 

307 else: 

308 # Try to analyze constructor 

309 return SignatureAnalyzer._analyze_callable(target.__init__, skip_first_param) 

310 elif dataclasses.is_dataclass(target): 

311 # Instance of dataclass 

312 return SignatureAnalyzer._analyze_dataclass_instance(target) 

313 else: 

314 # Function, method, or other callable 

315 return SignatureAnalyzer._analyze_callable(target, skip_first_param) 

316 

317 @staticmethod 

318 def _analyze_callable(callable_obj: Callable, skip_first_param: Optional[bool] = None) -> Dict[str, ParameterInfo]: 

319 """Extract parameter information from callable signature. 

320 

321 Args: 

322 callable_obj: The callable to analyze 

323 skip_first_param: Whether to skip the first parameter (after self/cls). 

324 If None, auto-detects based on context. 

325 """ 

326 sig = inspect.signature(callable_obj) 

327 type_hints = get_type_hints(callable_obj) 

328 

329 # Extract docstring information (with fallback for robustness) 

330 try: 

331 docstring_info = DocstringExtractor.extract(callable_obj) 

332 except: 

333 docstring_info = None 

334 

335 if not docstring_info: 

336 docstring_info = DocstringInfo() 

337 

338 parameters = {} 

339 param_list = list(sig.parameters.items()) 

340 

341 # Determine skip behavior: explicit parameter overrides auto-detection 

342 should_skip_first_param = ( 

343 skip_first_param if skip_first_param is not None 

344 else SignatureAnalyzer._should_skip_first_parameter(callable_obj) 

345 ) 

346 

347 first_param_after_self_skipped = False 

348 

349 for i, (param_name, param) in enumerate(param_list): 

350 # Always skip self/cls 

351 if param_name in (CONSTANTS.SELF_PARAM, CONSTANTS.CLS_PARAM): 

352 continue 

353 

354 # Always skip dunder parameters (internal/reserved fields) 

355 if param_name.startswith(CONSTANTS.DUNDER_PREFIX) and param_name.endswith(CONSTANTS.DUNDER_SUFFIX): 

356 continue 

357 

358 # Skip first parameter for image processing functions only 

359 if should_skip_first_param and not first_param_after_self_skipped: 

360 first_param_after_self_skipped = True 

361 continue 

362 

363 # Handle **kwargs parameters - try to extract original function signature 

364 if param.kind == inspect.Parameter.VAR_KEYWORD: 

365 # Try to find the original function if this is a wrapper 

366 original_params = SignatureAnalyzer._extract_original_parameters(callable_obj) 

367 if original_params: 

368 parameters.update(original_params) 

369 continue 

370 

371 from typing import Any 

372 param_type = type_hints.get(param_name, Any) 

373 default_value = param.default if param.default != inspect.Parameter.empty else None 

374 is_required = param.default == inspect.Parameter.empty 

375 

376 # Get parameter description from docstring 

377 param_description = docstring_info.parameters.get(param_name) if docstring_info else None 

378 

379 parameters[param_name] = ParameterInfo( 

380 name=param_name, 

381 param_type=param_type, 

382 default_value=default_value, 

383 is_required=is_required, 

384 description=param_description 

385 ) 

386 

387 return parameters 

388 

389 @staticmethod 

390 def _should_skip_first_parameter(callable_obj: Callable) -> bool: 

391 """ 

392 Determine if the first parameter should be skipped for any callable. 

393 

394 Universal logic that works with any object: 

395 - Constructors (__init__ methods): don't skip (all params are configuration) 

396 - All other callables: skip first param (assume it's data being processed) 

397 """ 

398 # Check if this is any __init__ method (constructor) 

399 if (hasattr(callable_obj, '__qualname__') and 

400 callable_obj.__qualname__.endswith(CONSTANTS.INIT_METHOD_SUFFIX)): 

401 return False 

402 

403 # Everything else: skip first parameter 

404 return True 

405 

406 @staticmethod 

407 def _extract_original_parameters(callable_obj: Callable) -> Dict[str, ParameterInfo]: 

408 """ 

409 Extract parameters from the original function if this is a wrapper with **kwargs. 

410 

411 This handles cases where scikit-image or other auto-registered functions 

412 are wrapped with (image, **kwargs) signatures. 

413 """ 

414 try: 

415 # Check if this function has access to the original function 

416 # Common patterns: __wrapped__, closure variables, etc. 

417 

418 # Pattern 1: Check if it's a functools.wraps wrapper 

419 if hasattr(callable_obj, '__wrapped__'): 

420 return SignatureAnalyzer._analyze_callable(callable_obj.__wrapped__) 

421 

422 # Pattern 2: Check closure for original function reference 

423 if hasattr(callable_obj, '__closure__') and callable_obj.__closure__: 

424 for cell in callable_obj.__closure__: 

425 if hasattr(cell.cell_contents, '__call__'): 

426 # Found a callable in closure - might be the original function 

427 try: 

428 orig_sig = inspect.signature(cell.cell_contents) 

429 # Skip if it also has **kwargs (avoid infinite recursion) 

430 if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in orig_sig.parameters.values()): 

431 continue 

432 return SignatureAnalyzer._analyze_callable(cell.cell_contents) 

433 except: 

434 continue 

435 

436 # Pattern 3: Try to extract from function name and module 

437 # This is a fallback for scikit-image functions 

438 if hasattr(callable_obj, '__name__') and hasattr(callable_obj, '__module__'): 

439 func_name = callable_obj.__name__ 

440 module_name = callable_obj.__module__ 

441 

442 # Try to find the original function in scikit-image 

443 if 'skimage' in module_name: 

444 try: 

445 import importlib 

446 # Extract the actual module path (remove wrapper module parts) 

447 if 'scikit_image_registry' in module_name: 

448 # This is our wrapper, try to find the original in skimage 

449 for skimage_module in ['skimage.filters', 'skimage.morphology', 

450 'skimage.segmentation', 'skimage.feature', 

451 'skimage.measure', 'skimage.transform', 

452 'skimage.restoration', 'skimage.exposure']: 

453 try: 

454 mod = importlib.import_module(skimage_module) 

455 if hasattr(mod, func_name): 

456 orig_func = getattr(mod, func_name) 

457 return SignatureAnalyzer._analyze_callable(orig_func) 

458 except: 

459 continue 

460 except: 

461 pass 

462 

463 return {} 

464 

465 except Exception: 

466 return {} 

467 

468 @staticmethod 

469 def _analyze_dataclass(dataclass_type: type) -> Dict[str, ParameterInfo]: 

470 """Extract parameter information from dataclass fields.""" 

471 try: 

472 type_hints = get_type_hints(dataclass_type) 

473 

474 # Extract docstring information from dataclass 

475 docstring_info = DocstringExtractor.extract(dataclass_type) 

476 

477 # Extract inline field documentation using AST 

478 inline_docs = SignatureAnalyzer._extract_inline_field_docs(dataclass_type) 

479 

480 parameters = {} 

481 

482 for field in dataclasses.fields(dataclass_type): 

483 param_type = type_hints.get(field.name, str) 

484 

485 # Get default value 

486 if field.default != dataclasses.MISSING: 

487 default_value = field.default 

488 is_required = False 

489 elif field.default_factory != dataclasses.MISSING: 

490 default_value = field.default_factory() 

491 is_required = False 

492 else: 

493 default_value = None 

494 is_required = True 

495 

496 # Get field description from multiple sources (priority order) 

497 field_description = None 

498 

499 # 1. Field metadata (highest priority) 

500 if hasattr(field, 'metadata') and 'description' in field.metadata: 

501 field_description = field.metadata['description'] 

502 # 2. Inline documentation strings (new!) 

503 elif field.name in inline_docs: 

504 field_description = inline_docs[field.name] 

505 # 3. Docstring parameters (fallback) 

506 else: 

507 field_description = docstring_info.parameters.get(field.name) 

508 

509 parameters[field.name] = ParameterInfo( 

510 name=field.name, 

511 param_type=param_type, 

512 default_value=default_value, 

513 is_required=is_required, 

514 description=field_description 

515 ) 

516 

517 return parameters 

518 

519 except Exception: 

520 # Return empty dict on error 

521 return {} 

522 

523 @staticmethod 

524 def _extract_inline_field_docs(dataclass_type: type) -> Dict[str, str]: 

525 """Extract inline field documentation strings using AST parsing. 

526 

527 This handles multiple patterns used for field documentation: 

528 

529 Pattern 1 - Next line string literal: 

530 @dataclass 

531 class Config: 

532 field_name: str = "default" 

533 '''Field description here.''' 

534 

535 Pattern 2 - Same line string literal (less common): 

536 @dataclass 

537 class Config: 

538 field_name: str = "default" # '''Field description''' 

539 

540 Pattern 3 - Traditional docstring parameters (handled by DocstringExtractor): 

541 @dataclass 

542 class Config: 

543 ''' 

544 Args: 

545 field_name: Field description here. 

546 ''' 

547 field_name: str = "default" 

548 """ 

549 try: 

550 import ast 

551 import re 

552 

553 source = inspect.getsource(dataclass_type) 

554 tree = ast.parse(source) 

555 

556 # Find the class definition 

557 class_node = None 

558 for node in ast.walk(tree): 

559 if isinstance(node, ast.ClassDef) and node.name == dataclass_type.__name__: 

560 class_node = node 

561 break 

562 

563 if not class_node: 

564 return {} 

565 

566 field_docs = {} 

567 source_lines = source.split('\n') 

568 

569 # Method 1: Look for field assignments followed by string literals (next line) 

570 for i, node in enumerate(class_node.body): 

571 if isinstance(node, ast.AnnAssign) and hasattr(node.target, 'id'): 

572 field_name = node.target.id 

573 

574 # Check if the next node is a string literal (documentation) 

575 if i + 1 < len(class_node.body): 

576 next_node = class_node.body[i + 1] 

577 if isinstance(next_node, ast.Expr) and isinstance(next_node.value, ast.Constant): 

578 if isinstance(next_node.value.value, str): 

579 field_docs[field_name] = next_node.value.value.strip() 

580 continue 

581 

582 # Method 2: Check for inline comments on the same line 

583 # Get the line number of the field definition 

584 field_line_num = node.lineno - 1 # Convert to 0-based indexing 

585 if 0 <= field_line_num < len(source_lines): 

586 line = source_lines[field_line_num] 

587 

588 # Look for string literals in comments on the same line 

589 # Pattern: field: type = value # """Documentation""" 

590 comment_match = re.search(r'#\s*["\']([^"\']+)["\']', line) 

591 if comment_match: 

592 field_docs[field_name] = comment_match.group(1).strip() 

593 continue 

594 

595 # Look for triple-quoted strings on the same line 

596 # Pattern: field: type = value """Documentation""" 

597 triple_quote_match = re.search(r'"""([^"]+)"""|\'\'\'([^\']+)\'\'\'', line) 

598 if triple_quote_match: 

599 doc_text = triple_quote_match.group(1) or triple_quote_match.group(2) 

600 field_docs[field_name] = doc_text.strip() 

601 

602 return field_docs 

603 

604 except Exception as e: 

605 # Return empty dict if AST parsing fails 

606 # Could add logging here for debugging: logger.debug(f"AST parsing failed: {e}") 

607 return {} 

608 

609 @staticmethod 

610 def _analyze_dataclass_instance(instance: object) -> Dict[str, ParameterInfo]: 

611 """Extract parameter information from a dataclass instance.""" 

612 try: 

613 # Get the type and analyze it 

614 dataclass_type = type(instance) 

615 parameters = SignatureAnalyzer._analyze_dataclass(dataclass_type) 

616 

617 # Update default values with current instance values 

618 # For lazy dataclasses, use object.__getattribute__ to preserve None values for placeholders 

619 for name, param_info in parameters.items(): 

620 if hasattr(instance, name): 

621 # Check if this is a lazy dataclass that should preserve None values 

622 if hasattr(instance, '_resolve_field_value'): 

623 # This is a lazy dataclass - use object.__getattribute__ to get stored value 

624 current_value = object.__getattribute__(instance, name) 

625 else: 

626 # Regular dataclass - use normal getattr 

627 current_value = getattr(instance, name) 

628 

629 # Create new ParameterInfo with current value as default 

630 parameters[name] = ParameterInfo( 

631 name=param_info.name, 

632 param_type=param_info.param_type, 

633 default_value=current_value, 

634 is_required=param_info.is_required, 

635 description=param_info.description 

636 ) 

637 

638 return parameters 

639 

640 except Exception: 

641 return {} 

642 

643 # Duplicate method removed - using the fixed version above