Coverage for openhcs/io/metadata_migration.py: 9.7%

118 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-14 05:57 +0000

1""" 

2OpenHCS Legacy Metadata Migration Utilities 

3 

4This module provides utilities to migrate old OpenHCS metadata files from the flat format 

5with absolute paths to the new subdirectory-keyed format with relative paths. 

6 

7The migration handles: 

8- Converting flat metadata structure to subdirectory-keyed format 

9- Converting absolute paths to relative paths 

10- Renaming .zarr directories to clean names 

11- Detecting and preserving backend information (disk vs zarr) 

12- Creating atomic backups during migration 

13 

14Usage as module: 

15 from openhcs.io.metadata_migration import migrate_plate_metadata, detect_legacy_format 

16 

17 # Check if migration is needed 

18 if detect_legacy_format(metadata_dict): 

19 success = migrate_plate_metadata(plate_dir) 

20 

21Usage as script: 

22 python -m openhcs.io.metadata_migration /path/to/plate/directory 

23 python -m openhcs.io.metadata_migration /path/to/plate/directory --dry-run 

24""" 

25 

26import argparse 

27import json 

28import logging 

29import os 

30import sys 

31from pathlib import Path 

32from typing import Dict, Any, List 

33 

34from .metadata_writer import METADATA_CONFIG 

35 

36logger = logging.getLogger(__name__) 

37 

38# Use the centralized metadata filename constant 

39METADATA_FILENAME = METADATA_CONFIG.METADATA_FILENAME 

40 

41 

42def detect_legacy_format(metadata_dict: Dict[str, Any]) -> bool: 

43 """ 

44 Detect if metadata is in legacy format. 

45  

46 Legacy format characteristics: 

47 - No 'subdirectories' key 

48 - 'image_files' contains absolute paths 

49  

50 Args: 

51 metadata_dict: Loaded metadata dictionary 

52  

53 Returns: 

54 True if legacy format detected, False otherwise 

55 """ 

56 # New format has subdirectories key 

57 if "subdirectories" in metadata_dict: 

58 return False 

59 

60 # Check if image_files contains absolute paths 

61 image_files = metadata_dict.get("image_files", []) 

62 if image_files and isinstance(image_files[0], str): 

63 # If first file path is absolute, assume legacy format 

64 return Path(image_files[0]).is_absolute() 

65 

66 return False 

67 

68 

69 

70def _rename_zarr_directories(plate_root: Path, dry_run: bool = False) -> Dict[str, str]: 

71 """ 

72 Rename any directories containing '.zarr' in their name to remove the suffix. 

73 

74 Args: 

75 plate_root: Root directory of the plate 

76 dry_run: If True, only simulate the renames 

77 

78 Returns: 

79 Dictionary mapping old names to new names 

80 """ 

81 renames = {} 

82 

83 for item in plate_root.iterdir(): 

84 if item.is_dir() and '.zarr' in item.name: 

85 old_name = item.name 

86 new_name = old_name.replace('.zarr', '') 

87 new_path = plate_root / new_name 

88 

89 # Only rename if target doesn't already exist 

90 if not new_path.exists(): 

91 if dry_run: 

92 logger.info(f"DRY RUN: Would rename directory: {old_name}{new_name}") 

93 else: 

94 logger.info(f"Renaming directory: {old_name}{new_name}") 

95 item.rename(new_path) 

96 renames[old_name] = new_name 

97 else: 

98 logger.warning(f"Cannot rename {old_name} to {new_name}: target already exists") 

99 

100 return renames 

101 

102 

103def migrate_legacy_metadata(legacy_metadata: Dict[str, Any], plate_root: Path, dry_run: bool = False) -> Dict[str, Any]: 

104 """ 

105 Migrate legacy flat metadata format to new subdirectory-keyed format. 

106 

107 Args: 

108 legacy_metadata: Legacy metadata dictionary 

109 plate_root: Root directory of the plate 

110 

111 Returns: 

112 Migrated metadata in new format 

113 """ 

114 # Step 1: Rename any .zarr directories to clean names 

115 renames = _rename_zarr_directories(plate_root, dry_run) 

116 

117 # Step 2: Determine subdirectory and backend from renames or find data directories 

118 has_zarr = bool(renames) # If we renamed .zarr directories, this is zarr storage 

119 

120 if renames: 

121 # Use the first renamed directory as the subdirectory 

122 sub_dir = next(iter(renames.values())) 

123 else: 

124 # Look for existing data directories 

125 potential_dirs = ["images", "data", "raw"] 

126 sub_dir = None 

127 for potential_dir in potential_dirs: 

128 if (plate_root / potential_dir).exists(): 

129 sub_dir = potential_dir 

130 break 

131 if sub_dir is None: 

132 sub_dir = "images" # Default fallback 

133 

134 # Step 3: Build relative paths using the subdirectory 

135 image_files = legacy_metadata.get("image_files", []) 

136 relative_files = [] 

137 

138 for legacy_path_str in image_files: 

139 # Extract filename from legacy path 

140 filename = Path(legacy_path_str).name 

141 # Create relative path with subdirectory prefix 

142 relative_files.append(f"{sub_dir}/{filename}") 

143 

144 

145 # Create new subdirectory-keyed structure 

146 migrated_metadata = { 

147 "subdirectories": { 

148 sub_dir: { 

149 "microscope_handler_name": legacy_metadata.get("microscope_handler_name"), 

150 "source_filename_parser_name": legacy_metadata.get("source_filename_parser_name"), 

151 "grid_dimensions": legacy_metadata.get("grid_dimensions"), 

152 "pixel_size": legacy_metadata.get("pixel_size"), 

153 "image_files": relative_files, 

154 "channels": legacy_metadata.get("channels"), 

155 "wells": legacy_metadata.get("wells"), 

156 "sites": legacy_metadata.get("sites"), 

157 "z_indexes": legacy_metadata.get("z_indexes"), 

158 "available_backends": {"zarr": True} if has_zarr else {"disk": True} 

159 } 

160 } 

161 } 

162 

163 return migrated_metadata 

164 

165 

166def migrate_plate_metadata(plate_dir: Path, dry_run: bool = False, backup_suffix: str = ".backup") -> bool: 

167 """ 

168 Migrate metadata file in a plate directory. 

169  

170 Args: 

171 plate_dir: Path to plate directory 

172 dry_run: If True, only show what would be done 

173 backup_suffix: Suffix for backup file 

174  

175 Returns: 

176 True if migration was needed and successful, False otherwise 

177 """ 

178 metadata_file = plate_dir / METADATA_FILENAME 

179 

180 if not metadata_file.exists(): 

181 logger.error(f"Metadata file not found: {metadata_file}") 

182 return False 

183 

184 # Load existing metadata 

185 try: 

186 with open(metadata_file, 'r') as f: 

187 metadata_dict = json.load(f) 

188 except (json.JSONDecodeError, IOError) as e: 

189 logger.error(f"Failed to load metadata from {metadata_file}: {e}") 

190 return False 

191 

192 # Check if migration is needed 

193 if not detect_legacy_format(metadata_dict): 

194 logger.info(f"Metadata file {metadata_file} is already in new format - no migration needed") 

195 return False 

196 

197 logger.info(f"Legacy format detected in {metadata_file}") 

198 

199 # Perform migration 

200 try: 

201 migrated_metadata = migrate_legacy_metadata(metadata_dict, plate_dir, dry_run) 

202 except Exception as e: 

203 logger.error(f"Failed to migrate metadata: {e}") 

204 return False 

205 

206 if dry_run: 

207 logger.info(f"DRY RUN: Would migrate {metadata_file}") 

208 logger.info(f"DRY RUN: Would create backup {metadata_file}{backup_suffix}") 

209 logger.info(f"DRY RUN: Migrated metadata would have {len(migrated_metadata['subdirectories'])} subdirectories") 

210 return True 

211 

212 # Create backup 

213 backup_file = metadata_file.with_suffix(f"{metadata_file.suffix}{backup_suffix}") 

214 try: 

215 metadata_file.rename(backup_file) 

216 logger.info(f"Created backup: {backup_file}") 

217 except OSError as e: 

218 logger.error(f"Failed to create backup: {e}") 

219 return False 

220 

221 # Write migrated metadata 

222 try: 

223 with open(metadata_file, 'w') as f: 

224 json.dump(migrated_metadata, f, indent=2) 

225 logger.info(f"Successfully migrated metadata file: {metadata_file}") 

226 return True 

227 except IOError as e: 

228 logger.error(f"Failed to write migrated metadata: {e}") 

229 # Restore backup 

230 try: 

231 backup_file.rename(metadata_file) 

232 logger.info(f"Restored original file from backup") 

233 except OSError: 

234 logger.error(f"Failed to restore backup - original file is at {backup_file}") 

235 return False 

236 

237 

238def main(): 

239 parser = argparse.ArgumentParser(description="Migrate OpenHCS legacy metadata files") 

240 parser.add_argument("plate_directory", type=Path, help="Path to plate directory containing openhcs_metadata.json") 

241 parser.add_argument("--dry-run", action="store_true", help="Show what would be done without making changes") 

242 parser.add_argument("--backup-suffix", default=".backup", help="Suffix for backup files (default: .backup)") 

243 

244 args = parser.parse_args() 

245 

246 plate_dir = args.plate_directory 

247 

248 if not plate_dir.exists(): 

249 logger.error(f"Plate directory does not exist: {plate_dir}") 

250 sys.exit(1) 

251 

252 if not plate_dir.is_dir(): 

253 logger.error(f"Path is not a directory: {plate_dir}") 

254 sys.exit(1) 

255 

256 success = migrate_plate_metadata(plate_dir, args.dry_run, args.backup_suffix) 

257 

258 if success: 

259 if args.dry_run: 

260 logger.info("Dry run completed - no changes made") 

261 else: 

262 logger.info("Migration completed successfully") 

263 sys.exit(0) 

264 else: 

265 logger.error("Migration failed") 

266 sys.exit(1) 

267 

268 

269if __name__ == "__main__": 

270 main()