Coverage for openhcs/io/metadata_migration.py: 9.2%

117 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-11-04 02:09 +0000

1""" 

2OpenHCS Legacy Metadata Migration Utilities 

3 

4This module provides utilities to migrate old OpenHCS metadata files from the flat format 

5with absolute paths to the new subdirectory-keyed format with relative paths. 

6 

7The migration handles: 

8- Converting flat metadata structure to subdirectory-keyed format 

9- Converting absolute paths to relative paths 

10- Renaming .zarr directories to clean names 

11- Detecting and preserving backend information (disk vs zarr) 

12- Creating atomic backups during migration 

13 

14Usage as module: 

15 from openhcs.io.metadata_migration import migrate_plate_metadata, detect_legacy_format 

16 

17 # Check if migration is needed 

18 if detect_legacy_format(metadata_dict): 

19 success = migrate_plate_metadata(plate_dir) 

20 

21Usage as script: 

22 python -m openhcs.io.metadata_migration /path/to/plate/directory 

23 python -m openhcs.io.metadata_migration /path/to/plate/directory --dry-run 

24""" 

25 

26import argparse 

27import json 

28import logging 

29import sys 

30from pathlib import Path 

31from typing import Dict, Any 

32 

33from .metadata_writer import METADATA_CONFIG 

34 

35logger = logging.getLogger(__name__) 

36 

37# Use the centralized metadata filename constant 

38METADATA_FILENAME = METADATA_CONFIG.METADATA_FILENAME 

39 

40 

41def detect_legacy_format(metadata_dict: Dict[str, Any]) -> bool: 

42 """ 

43 Detect if metadata is in legacy format. 

44  

45 Legacy format characteristics: 

46 - No 'subdirectories' key 

47 - 'image_files' contains absolute paths 

48  

49 Args: 

50 metadata_dict: Loaded metadata dictionary 

51  

52 Returns: 

53 True if legacy format detected, False otherwise 

54 """ 

55 # New format has subdirectories key 

56 if "subdirectories" in metadata_dict: 

57 return False 

58 

59 # Check if image_files contains absolute paths 

60 image_files = metadata_dict.get("image_files", []) 

61 if image_files and isinstance(image_files[0], str): 

62 # If first file path is absolute, assume legacy format 

63 return Path(image_files[0]).is_absolute() 

64 

65 return False 

66 

67 

68 

69def _rename_zarr_directories(plate_root: Path, dry_run: bool = False) -> Dict[str, str]: 

70 """ 

71 Rename any directories containing '.zarr' in their name to remove the suffix. 

72 

73 Args: 

74 plate_root: Root directory of the plate 

75 dry_run: If True, only simulate the renames 

76 

77 Returns: 

78 Dictionary mapping old names to new names 

79 """ 

80 renames = {} 

81 

82 for item in plate_root.iterdir(): 

83 if item.is_dir() and '.zarr' in item.name: 

84 old_name = item.name 

85 new_name = old_name.replace('.zarr', '') 

86 new_path = plate_root / new_name 

87 

88 # Only rename if target doesn't already exist 

89 if not new_path.exists(): 

90 if dry_run: 

91 logger.info(f"DRY RUN: Would rename directory: {old_name}{new_name}") 

92 else: 

93 logger.info(f"Renaming directory: {old_name}{new_name}") 

94 item.rename(new_path) 

95 renames[old_name] = new_name 

96 else: 

97 logger.warning(f"Cannot rename {old_name} to {new_name}: target already exists") 

98 

99 return renames 

100 

101 

102def migrate_legacy_metadata(legacy_metadata: Dict[str, Any], plate_root: Path, dry_run: bool = False) -> Dict[str, Any]: 

103 """ 

104 Migrate legacy flat metadata format to new subdirectory-keyed format. 

105 

106 Args: 

107 legacy_metadata: Legacy metadata dictionary 

108 plate_root: Root directory of the plate 

109 

110 Returns: 

111 Migrated metadata in new format 

112 """ 

113 # Step 1: Rename any .zarr directories to clean names 

114 renames = _rename_zarr_directories(plate_root, dry_run) 

115 

116 # Step 2: Determine subdirectory and backend from renames or find data directories 

117 has_zarr = bool(renames) # If we renamed .zarr directories, this is zarr storage 

118 

119 if renames: 

120 # Use the first renamed directory as the subdirectory 

121 sub_dir = next(iter(renames.values())) 

122 else: 

123 # Look for existing data directories 

124 potential_dirs = ["images", "data", "raw"] 

125 sub_dir = None 

126 for potential_dir in potential_dirs: 

127 if (plate_root / potential_dir).exists(): 

128 sub_dir = potential_dir 

129 break 

130 if sub_dir is None: 

131 sub_dir = "images" # Default fallback 

132 

133 # Step 3: Build relative paths using the subdirectory 

134 image_files = legacy_metadata.get("image_files", []) 

135 relative_files = [] 

136 

137 for legacy_path_str in image_files: 

138 # Extract filename from legacy path 

139 filename = Path(legacy_path_str).name 

140 # Create relative path with subdirectory prefix 

141 relative_files.append(f"{sub_dir}/{filename}") 

142 

143 

144 # Create new subdirectory-keyed structure 

145 migrated_metadata = { 

146 "subdirectories": { 

147 sub_dir: { 

148 "microscope_handler_name": legacy_metadata.get("microscope_handler_name"), 

149 "source_filename_parser_name": legacy_metadata.get("source_filename_parser_name"), 

150 "grid_dimensions": legacy_metadata.get("grid_dimensions"), 

151 "pixel_size": legacy_metadata.get("pixel_size"), 

152 "image_files": relative_files, 

153 "channels": legacy_metadata.get("channels"), 

154 "wells": legacy_metadata.get("wells"), 

155 "sites": legacy_metadata.get("sites"), 

156 "z_indexes": legacy_metadata.get("z_indexes"), 

157 "available_backends": {"zarr": True} if has_zarr else {"disk": True} 

158 } 

159 } 

160 } 

161 

162 return migrated_metadata 

163 

164 

165def migrate_plate_metadata(plate_dir: Path, dry_run: bool = False, backup_suffix: str = ".backup") -> bool: 

166 """ 

167 Migrate metadata file in a plate directory. 

168  

169 Args: 

170 plate_dir: Path to plate directory 

171 dry_run: If True, only show what would be done 

172 backup_suffix: Suffix for backup file 

173  

174 Returns: 

175 True if migration was needed and successful, False otherwise 

176 """ 

177 metadata_file = plate_dir / METADATA_FILENAME 

178 

179 if not metadata_file.exists(): 

180 logger.error(f"Metadata file not found: {metadata_file}") 

181 return False 

182 

183 # Load existing metadata 

184 try: 

185 with open(metadata_file, 'r') as f: 

186 metadata_dict = json.load(f) 

187 except (json.JSONDecodeError, IOError) as e: 

188 logger.error(f"Failed to load metadata from {metadata_file}: {e}") 

189 return False 

190 

191 # Check if migration is needed 

192 if not detect_legacy_format(metadata_dict): 

193 logger.info(f"Metadata file {metadata_file} is already in new format - no migration needed") 

194 return False 

195 

196 logger.info(f"Legacy format detected in {metadata_file}") 

197 

198 # Perform migration 

199 try: 

200 migrated_metadata = migrate_legacy_metadata(metadata_dict, plate_dir, dry_run) 

201 except Exception as e: 

202 logger.error(f"Failed to migrate metadata: {e}") 

203 return False 

204 

205 if dry_run: 

206 logger.info(f"DRY RUN: Would migrate {metadata_file}") 

207 logger.info(f"DRY RUN: Would create backup {metadata_file}{backup_suffix}") 

208 logger.info(f"DRY RUN: Migrated metadata would have {len(migrated_metadata['subdirectories'])} subdirectories") 

209 return True 

210 

211 # Create backup 

212 backup_file = metadata_file.with_suffix(f"{metadata_file.suffix}{backup_suffix}") 

213 try: 

214 metadata_file.rename(backup_file) 

215 logger.info(f"Created backup: {backup_file}") 

216 except OSError as e: 

217 logger.error(f"Failed to create backup: {e}") 

218 return False 

219 

220 # Write migrated metadata 

221 try: 

222 with open(metadata_file, 'w') as f: 

223 json.dump(migrated_metadata, f, indent=2) 

224 logger.info(f"Successfully migrated metadata file: {metadata_file}") 

225 return True 

226 except IOError as e: 

227 logger.error(f"Failed to write migrated metadata: {e}") 

228 # Restore backup 

229 try: 

230 backup_file.rename(metadata_file) 

231 logger.info("Restored original file from backup") 

232 except OSError: 

233 logger.error(f"Failed to restore backup - original file is at {backup_file}") 

234 return False 

235 

236 

237def main(): 

238 parser = argparse.ArgumentParser(description="Migrate OpenHCS legacy metadata files") 

239 parser.add_argument("plate_directory", type=Path, help="Path to plate directory containing openhcs_metadata.json") 

240 parser.add_argument("--dry-run", action="store_true", help="Show what would be done without making changes") 

241 parser.add_argument("--backup-suffix", default=".backup", help="Suffix for backup files (default: .backup)") 

242 

243 args = parser.parse_args() 

244 

245 plate_dir = args.plate_directory 

246 

247 if not plate_dir.exists(): 

248 logger.error(f"Plate directory does not exist: {plate_dir}") 

249 sys.exit(1) 

250 

251 if not plate_dir.is_dir(): 

252 logger.error(f"Path is not a directory: {plate_dir}") 

253 sys.exit(1) 

254 

255 success = migrate_plate_metadata(plate_dir, args.dry_run, args.backup_suffix) 

256 

257 if success: 

258 if args.dry_run: 

259 logger.info("Dry run completed - no changes made") 

260 else: 

261 logger.info("Migration completed successfully") 

262 sys.exit(0) 

263 else: 

264 logger.error("Migration failed") 

265 sys.exit(1) 

266 

267 

268if __name__ == "__main__": 

269 main()