Coverage for lynceus/files/file_factory.py: 87%

162 statements  

« prev     ^ index     » next       coverage.py v7.10.0, created at 2025-07-29 08:46 +0000

1import inspect 

2from pathlib import Path 

3 

4from lynceus.core.config import ( 

5 CONFIG_GENERAL_KEY, 

6 CONFIG_STORAGE_DYNAMIC_TYPE, 

7 CONFIG_STORAGE_IS_DYNAMIC, 

8 CONFIG_STORAGE_REMOTE_TYPE, 

9) 

10from lynceus.core.config.lynceus_config import LynceusConfig 

11from lynceus.core.exchange.lynceus_exchange import LynceusExchange 

12from lynceus.core.lynceus import LynceusSession 

13from lynceus.core.lynceus_client import LynceusClientClass 

14from lynceus.files.lynceus_file import ( 

15 LynceusFile, 

16 _LocalLynceusFile, 

17 _RemoteS3LynceusFile, 

18) 

19from lynceus.files.remote.s3 import S3Utils 

20from lynceus.lynceus_exceptions import LynceusConfigError, LynceusFileError 

21from lynceus.utils import lookup_root_path 

22from tests.storage_metadata import StorageMetadataBase 

23 

24 

25# pylint: disable=too-many-instance-attributes 

26class LynceusFileFactory(LynceusClientClass): 

27 """ 

28 LynceusFileFactory is useful to create instance of Local or Remote according to various configuration provided once 

29 for all in Factory constructor. 

30 

31 By default, it is configured to read and write parquet files, but it can be configured to manage any kind of file. 

32 """ 

33 

34 REMOTE_STORAGE_TYPE_S3: str = "s3" 

35 REMOTE_STORAGE_TYPE_SUPPORTED_LIST: set[str] = {REMOTE_STORAGE_TYPE_S3} 

36 

37 # pylint: disable=too-many-branches,too-many-statements 

38 def __init__( 

39 self, 

40 *, 

41 lynceus_session: LynceusSession, 

42 lynceus_exchange: LynceusExchange | None, 

43 lynceus_config: LynceusConfig = None, 

44 name: str | None = None, 

45 env: str | None = None, 

46 env_suffix: str | None = None, 

47 remote_config_section: str | None = None, 

48 remote_config_key: str | None = None, 

49 remote_mode_forced_by_cli: bool = True, 

50 remote_mode_automatic_activation: bool = False, 

51 source_path_format: str = "{target}/{env}/parquet", 

52 source_mode: bool = False, 

53 dest_path_format: str = "{dest_file_name}/{source_name}.parquet", 

54 dest_path_kwargs: dict[str, str] | None = None, 

55 remote_dynamic_type_class_map: dict[str, type[StorageMetadataBase]] | None = None, 

56 dynamic_storage_mandatory_param_map: dict[str, bool] | None = None, 

57 ): 

58 """ 

59 Initialize Lynceus file Factory generating File allowing local or remote management. 

60 

61 Factory is configured according to remote mode toggle (depending on specified argument 

62 and optional overriding configuration). 

63 

64 Parameters 

65 ---------- 

66 lynceus_session : LynceusSession 

67 Active Lynceus session 

68 lynceus_exchange : LynceusExchange, optional 

69 Exchange instance for communication 

70 lynceus_config : LynceusConfig, optional 

71 Configuration to use (if not specified, configuration of specified lynceus session is used) 

72 name : str, optional 

73 Name identifier for the factory 

74 env : str, optional 

75 Name of the environment which will be used as parent directory of parquet files (for write access) 

76 env_suffix : str, optional 

77 Suffix to append to environment name 

78 remote_config_section : str, optional 

79 Config section containing remote storage configuration 

80 remote_config_key : str, optional 

81 Config key, in General section, giving config section (needed only if remote_config_section is not defined) 

82 remote_mode_forced_by_cli : bool, default True 

83 (forced by CLI) True to read remotely, False to read locally 

84 remote_mode_automatic_activation : bool, default False 

85 Automatic activation requested 

86 source_path_format : str, default "{target}/{env}/parquet" 

87 The format used to generate source path 

88 source_mode : bool, default False 

89 Whether this factory is used in source mode 

90 dest_path_format : str, default "{dest_file_name}/{source_name}.parquet" 

91 The format used to generate destination path 

92 dest_path_kwargs : dict, optional 

93 The parameters used to generate destination path 

94 remote_dynamic_type_class_map : dict, optional 

95 Mapping of dynamic type names to StorageMetadataBase classes 

96 dynamic_storage_mandatory_param_map : dict, optional 

97 Mapping of parameter names to mandatory flags for dynamic storage 

98 

99 Notes 

100 ----- 

101 remote_mode explanation: 

102 - by default destination files are read and write locally, 

103 - if Host is our Data Factory, 

104 or if NB_USER environment variable is defined to jovyan, indicating system is launched with Docker image, 

105 remote_mode automatic activation is requested 

106 - but the **override_to_local_mode** configuration (in configuration file), allow ignoring automatic activation request 

107 - in any case, the remote_mode_forced_by_cli toggle (e.g. set by a --remote-mode CLI option) can be used to force remote mode 

108 (it will be False here, BUT 100% of request will use the override_remote_mode method parameter). 

109 => this system could be lightened, but it is risky to do that while keeping backward compatibility. 

110 """ 

111 super().__init__( 

112 logger_name="file", 

113 lynceus_session=lynceus_session, 

114 lynceus_exchange=lynceus_exchange, 

115 ) 

116 

117 # Safe-guard: 

118 if not source_path_format: 

119 raise LynceusFileError("Source path format must be defined!") 

120 

121 self.__env: str = env 

122 self.__name: str = name or remote_config_section 

123 self.__remote_mode: bool = remote_mode_forced_by_cli 

124 self.__remote_mode_automatic_activation: bool = remote_mode_automatic_activation 

125 self.__local_mode_forced_by_config: bool = False 

126 self.__source_path_format: str = source_path_format 

127 self.__source_mode = source_mode 

128 self.__dest_path_format: str = dest_path_format 

129 self.__dest_path_kwargs: dict[str, str] = dest_path_kwargs or {} 

130 self.__previous_dest_path_kwargs = {} 

131 self.__remote_dynamic_type_class_map = remote_dynamic_type_class_map or {} 

132 self.__dynamic_storage_mandatory_param_map = dynamic_storage_mandatory_param_map or {} 

133 

134 if not lynceus_config: 

135 lynceus_config = self._lynceus_session.get_lynceus_config_copy() 

136 

137 # Loads remote configuration, and define some variable accordingly. 

138 self.__dynamic_container_name_params: dict[str, str] = {} 

139 if not remote_config_section and not remote_config_key: 

140 self._logger.warning( 

141 "No remote configuration given at all, this file factory will only be able to create Local file." 

142 ) 

143 self.__remote_config = None 

144 self.__with_dynamic_container_name: bool = False 

145 else: 

146 if not remote_config_section: 

147 remote_config_section = lynceus_config.get_config(CONFIG_GENERAL_KEY, remote_config_key) 

148 self.__remote_config = lynceus_config[remote_config_section] 

149 if not self.__remote_config: 

150 raise ValueError( 

151 f'There is no "{remote_config_section}" configuration in configuration file (are you sure you load the storage definition file ?).' 

152 ) 

153 

154 self._logger.debug( 

155 f'According to "{remote_config_section}" configuration section, LynceusFileFactory will' 

156 + f' consider remote configuration named "{remote_config_section}": "{LynceusConfig.format_config(self.__remote_config)}".' 

157 ) 

158 

159 if self.__remote_config.get(CONFIG_STORAGE_REMOTE_TYPE) not in LynceusFileFactory.REMOTE_STORAGE_TYPE_SUPPORTED_LIST: 

160 raise NotImplementedError( 

161 f'Configured "{CONFIG_STORAGE_REMOTE_TYPE}={self.__remote_config.get(CONFIG_STORAGE_REMOTE_TYPE)}" is not supported.' 

162 + f" Supported values: {LynceusFileFactory.REMOTE_STORAGE_TYPE_SUPPORTED_LIST}" 

163 ) 

164 

165 # Retrieves optional is_dynamic configuration option. 

166 self.__with_dynamic_container_name: bool = LynceusConfig.to_bool(self.__remote_config.get(CONFIG_STORAGE_IS_DYNAMIC)) 

167 self.__with_dynamic_container_type: str = self.__remote_config.get(CONFIG_STORAGE_DYNAMIC_TYPE) 

168 

169 remote_dynamic_type_supported_list: set[str] = set(self.__remote_dynamic_type_class_map.keys()) | {None} 

170 if self.__with_dynamic_container_type not in remote_dynamic_type_supported_list: 

171 raise NotImplementedError( 

172 f'Configured "{CONFIG_STORAGE_DYNAMIC_TYPE}={self.__with_dynamic_container_type}" is not supported.' 

173 + f" Supported values: {remote_dynamic_type_supported_list}" 

174 ) 

175 

176 # Initializes some utilities. 

177 self.__s3utils = S3Utils( 

178 lynceus_session=lynceus_session, 

179 lynceus_exchange=lynceus_exchange, 

180 lynceus_s3_config=self.__remote_config, 

181 ) 

182 self.__s3utils.initialize() 

183 

184 # Checks if local mode (against remote mode) is forced in configuration file. 

185 if "override_to_local_mode" in self.__remote_config: 

186 if LynceusConfig.to_bool(self.__remote_config["override_to_local_mode"]): 

187 self.__local_mode_forced_by_config = True 

188 # It is the case so defined the remote mode as False. 

189 self.__remote_mode = False 

190 # pylint: disable=logging-not-lazy 

191 self._logger.info( 

192 f'According to "override_to_local_mode" configuration in "{remote_config_section}"' 

193 + ' remote mode is overridden to "local" (it can only be overridden by CLI option).' 

194 ) 

195 else: 

196 # In any case, set the remote mode as the value of CLI **or** auto activation. 

197 self.__remote_mode |= remote_mode_automatic_activation 

198 self._logger.info( 

199 f'remote mode="{self.__remote_mode}" (forced by CLI option="{remote_mode_forced_by_cli}";' 

200 + f' automatic activation according to environment="{self.__remote_mode_automatic_activation}").' 

201 ) 

202 

203 if "override_environment" in self.__remote_config: 

204 self.__env = self.__remote_config["override_environment"] 

205 self.__env = self.__define_complete_env(self.__env, env_suffix) 

206 self._logger.info( 

207 f'According to "override_environment" configuration in "{remote_config_section}"' 

208 + f' Environnment is overriden to "{self.__env}".' 

209 ) 

210 

211 # Defines remote root path, once for all. 

212 self.__remote_root_path: Path | None = None 

213 if not self.__with_dynamic_container_name: 

214 self.__remote_root_path = self.__define_remote_root_path(self.__env) 

215 

216 # Defines default environment if needed. 

217 if self.__env is None: 

218 self.__env = self.__define_complete_env("dev", env_suffix) 

219 self._logger.info( 

220 f'No environment defined in CLI or configuration, defined it to "{self.__env}".' 

221 ) 

222 

223 # Defines local root path, once for all. 

224 self.__local_root_path: Path = self.__define_local_root_path(self.__env) 

225 

226 # Defines string presentation of this LynceusFile Factory. 

227 self.__string_presentation = LynceusConfig.format_dict_to_string( 

228 LynceusConfig.format_config( 

229 self.get_context_info() 

230 | { 

231 "env": self.__env, 

232 "source_path_format": self.__source_path_format, 

233 "source_mode": self.__source_mode, 

234 "dest_path_format": self.__dest_path_format, 

235 "storage": self.__remote_config or "Local only", 

236 "dynamic": self.__dynamic_container_name_params, 

237 } 

238 ), 

239 indentation_level=2, 

240 ) 

241 

242 @property 

243 def name(self): 

244 """ 

245 Get the name of this file factory. 

246 

247 Returns 

248 ------- 

249 str 

250 The factory name 

251 """ 

252 return self.__name 

253 

254 @property 

255 def is_dynamic_remote(self): 

256 """ 

257 Check if this factory uses dynamic remote container naming. 

258 

259 Returns 

260 ------- 

261 bool 

262 True if using dynamic container names, False otherwise 

263 """ 

264 return self.__with_dynamic_container_name 

265 

266 def __define_complete_env(self, env: str, env_suffix: str): 

267 """ 

268 Build the complete environment name including optional suffix. 

269 

270 For source mode, suffix is ignored. For target mode, suffix is appended 

271 to create a hierarchical environment structure. 

272 

273 Parameters 

274 ---------- 

275 env : str 

276 Base environment name 

277 env_suffix : str 

278 Optional suffix to append (ignored in source mode) 

279 

280 Returns 

281 ------- 

282 str 

283 Complete environment name 

284 """ 

285 # Checks if this factory is used as a source. 

286 if self.__source_mode: 

287 # It is the case, so suffix is NOT used here. 

288 return env 

289 

290 # It is used as a target, so environment suffix must be taken care. 

291 return env if not env_suffix else f"{env}/{env_suffix}" 

292 

293 def force_cache_refresh(self): 

294 """ 

295 Force refresh of the remote storage cache. 

296 

297 Invalidate any cached information about remote files to ensure 

298 fresh data is retrieved on next access. 

299 """ 

300 if self.__remote_config: 

301 self.__s3utils.force_cache_refresh() 

302 

303 def get_env(self) -> str: 

304 """ 

305 Get the current environment name. 

306 

307 Returns 

308 ------- 

309 str 

310 The environment name used by this factory 

311 """ 

312 return self.__env 

313 

314 def __build_relative_path_dir(self, target: str, env: str | None): 

315 """ 

316 Build relative directory path using the configured format. 

317 

318 Use the source path format template with provided parameters 

319 to generate the relative directory structure. 

320 

321 Parameters 

322 ---------- 

323 target : str 

324 Target identifier for the path 

325 env : str, optional 

326 Environment name (can be None) 

327 

328 Returns 

329 ------- 

330 str 

331 Formatted relative directory path 

332 """ 

333 return self.__source_path_format.format( 

334 **self.__dest_path_kwargs, target=target, env=env 

335 ) 

336 

337 def __define_local_root_path(self, env: str | None) -> Path: 

338 """ 

339 Define the local root path for file operations. 

340 

341 Construct the local filesystem root path based on the project 

342 structure and environment configuration. 

343 

344 Parameters 

345 ---------- 

346 env : str, optional 

347 Environment name for path building 

348 

349 Returns 

350 ------- 

351 Path 

352 Local root path for file operations 

353 """ 

354 root_path: Path = lookup_root_path( 

355 "lynceus/misc", root_path=Path(__file__).parent 

356 ) 

357 return root_path / Path(self.__build_relative_path_dir("target", env)) 

358 

359 def update_dynamic_storage_params(self, params: dict[str, str | int]): 

360 """ 

361 Update parameters for dynamic storage container naming. 

362 

363 Update the parameters used to generate dynamic container names 

364 and validate that all mandatory parameters are provided. 

365 

366 Parameters 

367 ---------- 

368 params : dict 

369 Dictionary of parameter names to values 

370 

371 Raises 

372 ------ 

373 LynceusFileError 

374 If mandatory parameters are missing 

375 """ 

376 self.__dynamic_container_name_params |= params.copy() 

377 mandatory_params: set[str] = { 

378 param 

379 for param, is_mandatory in self.__dynamic_storage_mandatory_param_map.items() 

380 if is_mandatory 

381 } 

382 

383 # Ensures there are all the mandatory params. 

384 if mandatory_params - set(self.__dynamic_container_name_params.keys()): 

385 raise LynceusFileError( 

386 f"Specified dynamic storage params ({set(self.__dynamic_container_name_params.keys())})," 

387 + f" should contain at least all the awaited ones ({mandatory_params})." 

388 ) 

389 

390 def __define_remote_container_name(self) -> str: 

391 """ 

392 Define the remote storage container name. 

393 

394 For static storage, return the configured bucket name. 

395 For dynamic storage, instantiate the appropriate metadata class 

396 and generate a unique storage name. 

397 

398 Returns 

399 ------- 

400 str 

401 Container name for remote storage 

402 

403 Raises 

404 ------ 

405 LynceusConfigError 

406 If dynamic container type is not found 

407 LynceusFileError 

408 If unable to create the storage metadata instance 

409 """ 

410 # Checks if it is a static or dynamic storage. 

411 if not self.__with_dynamic_container_name: 

412 return self.__remote_config["bucket_name"] 

413 

414 # Safe-guard: Checks if it is a static or dynamic storage. 

415 if self.__with_dynamic_container_type not in self.__remote_dynamic_type_class_map: 

416 raise LynceusConfigError( 

417 f"Unable to find a dynamic container with type {self.__with_dynamic_container_type} in your configuration." 

418 ) 

419 

420 # Defines which StorageMetadata and params according to configuration. 

421 storage_metadata_class: type[StorageMetadataBase] = self.__remote_dynamic_type_class_map.get(self.__with_dynamic_container_type) 

422 awaited_params = set(inspect.getfullargspec(storage_metadata_class).args) - {"self"} 

423 

424 # Creates the corresponding StorageMetadata, and requests the unique storage name building to be 100% sure 

425 # it will be the exact same name used during creation, and during compute resources request. 

426 try: 

427 # Filters parameters to use to instantiate such StorageMetadata class. 

428 params = { 

429 key: value 

430 for key, value in self.__dynamic_container_name_params.items() 

431 if key in awaited_params 

432 } 

433 

434 dynamic_storage: StorageMetadataBase = storage_metadata_class(**params) 

435 return dynamic_storage.build_unique_storage_name() 

436 except TypeError as exc: 

437 raise LynceusFileError( 

438 f'Unable to define the name of the dynamic remote container "{self}".', 

439 exc, 

440 ) from exc 

441 

442 def __define_remote_root_path(self, env: str | None) -> Path: 

443 """ 

444 Define the remote root path for file operations. 

445 

446 Construct the S3 root path using the container name and environment. 

447 

448 Parameters 

449 ---------- 

450 env : str, optional 

451 Environment name for path building 

452 

453 Returns 

454 ------- 

455 Path 

456 Remote root path with S3 prefix 

457 """ 

458 relative_path_dir: str = self.__build_relative_path_dir( 

459 self.__define_remote_container_name(), env 

460 ) 

461 

462 return Path(f"{LynceusFile.S3_PATH_BEGIN}{relative_path_dir}/") 

463 

464 # pylint: disable=too-many-positional-arguments 

465 def new_file( 

466 self, 

467 source_name: str | None, 

468 source_file_name: Path | str, 

469 must_exist: bool = True, 

470 override_env: str = None, 

471 override_remote_mode: bool = None, 

472 create_sub_directories: bool = True, 

473 dest_path_format: str = None, 

474 override_dest_path_kwargs: dict = None, 

475 specific_dest_file_name: str = None, 

476 ) -> LynceusFile: 

477 """ 

478 Create a new LynceusFile instance with the specified parameters. 

479 

480 Build file paths using the configured format and create appropriate 

481 local or remote file instances based on the mode settings. 

482 

483 Parameters 

484 ---------- 

485 source_name : str, optional 

486 Name of the source (used in path formatting) 

487 source_file_name : str or Path 

488 Base filename or path 

489 must_exist : bool, default True 

490 Whether the file must exist (raises error if not) 

491 override_env : str, optional 

492 Environment override for this file 

493 override_remote_mode : bool, optional 

494 Override the default remote mode setting 

495 create_sub_directories : bool, default True 

496 Whether to create parent directories 

497 dest_path_format : str, optional 

498 Custom path format (overrides default) 

499 override_dest_path_kwargs : dict, optional 

500 Custom path formatting parameters 

501 specific_dest_file_name : str, optional 

502 Override the destination filename 

503 

504 Returns 

505 ------- 

506 LynceusFile 

507 Configured file instance (local or remote) 

508 

509 Raises 

510 ------ 

511 KeyError 

512 If path formatting fails due to missing parameters 

513 """ 

514 # Safe-guard: ensures source_file_name is defined to minimum. 

515 source_file_name: str = str(source_file_name) or "/" 

516 

517 # Defines destination file name, which is the same as the source file name by default. 

518 dest_file_name = ( 

519 specific_dest_file_name if specific_dest_file_name else source_file_name 

520 ) 

521 

522 # Manages path kwargs overriding if needed. 

523 path_kwargs = ( 

524 override_dest_path_kwargs 

525 if override_dest_path_kwargs 

526 else self.__dest_path_kwargs 

527 ) 

528 

529 # Special Hack (mainly needed for CustomerInfo auto merge system), using previous path kwargs if none is defined here. 

530 if not path_kwargs: 

531 path_kwargs = self.__previous_dest_path_kwargs 

532 else: 

533 # Registers path_kwargs for next potential iteration. 

534 self.__previous_dest_path_kwargs = path_kwargs 

535 

536 # Formats the new file path. 

537 if not dest_path_format: 

538 dest_path_format = self.__dest_path_format 

539 

540 try: 

541 new_file_path: str = dest_path_format.format( 

542 **path_kwargs, source_name=source_name, dest_file_name=dest_file_name 

543 ) 

544 except KeyError: 

545 # Gives as much information as possible. 

546 self._logger.error( 

547 f'Unable to build the file path from format "{dest_path_format}" and arguments: "{source_name}", "{dest_file_name}", "{path_kwargs=}"' 

548 ) 

549 # Stops on error anyway. 

550 raise 

551 

552 return self._do_new_file( 

553 new_file_path, 

554 must_exist, 

555 override_env, 

556 override_remote_mode, 

557 create_sub_directories=create_sub_directories, 

558 ) 

559 

560 # pylint: disable=too-many-positional-arguments 

561 def new_env_directory( 

562 self, 

563 must_exist: bool = True, 

564 override_env: str = None, 

565 override_remote_mode: bool = None, 

566 ) -> LynceusFile: 

567 """ 

568 Create a LynceusFile instance representing an environment directory. 

569 

570 Create a file instance pointing to the environment root directory 

571 without creating subdirectories. 

572 

573 Parameters 

574 ---------- 

575 must_exist : bool, default True 

576 Whether the directory must exist 

577 override_env : str, optional 

578 Environment override 

579 override_remote_mode : bool, optional 

580 Override the default remote mode setting 

581 

582 Returns 

583 ------- 

584 LynceusFile 

585 File instance representing the environment directory 

586 """ 

587 return self._do_new_file( 

588 "", 

589 must_exist, 

590 override_env, 

591 override_remote_mode, 

592 create_sub_directories=False, 

593 ) 

594 

595 def _do_new_file( 

596 self, 

597 path: str, 

598 must_exist: bool = True, 

599 override_env: str | None = None, 

600 override_remote_mode: bool = None, 

601 create_sub_directories: bool = True, 

602 ) -> LynceusFile: 

603 """ 

604 Internal method to create a new file instance. 

605 

606 Handle the core logic for creating local or remote file instances, 

607 including path resolution, globbing for remote files, and cache management. 

608 

609 Parameters 

610 ---------- 

611 path : str 

612 Relative path within the environment 

613 must_exist : bool, default True 

614 Whether the file must exist 

615 override_env : str, optional 

616 Environment override 

617 override_remote_mode : bool, optional 

618 Override the default remote mode setting 

619 create_sub_directories : bool, default True 

620 Whether to create parent directories 

621 

622 Returns 

623 ------- 

624 LynceusFile 

625 Configured file instance 

626 """ 

627 if self.__remote_mode or override_remote_mode: 

628 root_path: Path = ( 

629 self.__remote_root_path 

630 if override_env is None and not self.__with_dynamic_container_name 

631 else self.__define_remote_root_path(override_env) 

632 ) 

633 

634 complete_path: Path = root_path 

635 

636 # Important: concatenates path only if it exists and not '/'. 

637 if path and path != "/": 

638 complete_path /= Path(path) 

639 

640 # Manages optional globbing if needed. 

641 if "*" in path: 

642 matching_files = self.__s3utils.list_remote_files( 

643 remote_root_path=_RemoteS3LynceusFile.get_raw_path_from_remote_path(root_path), 

644 recursive=True, 

645 pattern=path, 

646 detail=True, 

647 ) 

648 if not matching_files: 

649 self._logger.warning( 

650 f'Unable to find any remote files while globbing with "{complete_path}". It will certainly lead to not found file.' 

651 ) 

652 else: 

653 # Sorts by last modification date. 

654 sorted_matching_files = sorted( 

655 matching_files.items(), 

656 key=lambda kv: kv[1]["LastModified"], 

657 reverse=True, 

658 ) 

659 selected_file_path = sorted_matching_files[0][0] 

660 

661 complete_path = root_path / Path(selected_file_path) 

662 else: 

663 root_path: Path = ( 

664 self.__local_root_path 

665 if override_env is None and not self.__with_dynamic_container_name 

666 else self.__define_local_root_path(override_env) 

667 ) 

668 

669 complete_path: Path = root_path / Path(path) 

670 # TODO: implement globbing on local path, something like root_path.glob(pattern) 

671 

672 return self.create_from_full_path( 

673 complete_path, 

674 must_exist=must_exist, 

675 override_remote_mode=override_remote_mode, 

676 create_sub_directories=create_sub_directories, 

677 ) 

678 

679 def create_from_full_path( 

680 self, 

681 complete_path: Path, 

682 must_exist: bool = True, 

683 override_remote_mode: bool = None, 

684 create_sub_directories: bool = True, 

685 ) -> LynceusFile: 

686 """ 

687 Create a LynceusFile instance from a complete file path. 

688 

689 Create the appropriate local or remote file instance based on the path 

690 and factory configuration. Handle cache refresh for remote files. 

691 

692 Parameters 

693 ---------- 

694 complete_path : Path 

695 Complete file path (local or remote) 

696 must_exist : bool, default True 

697 Whether the file must exist (raises error if not) 

698 override_remote_mode : bool, optional 

699 Override the default remote mode setting 

700 create_sub_directories : bool, default True 

701 Whether to create parent directories for local files 

702 

703 Returns 

704 ------- 

705 LynceusFile 

706 File instance (local or remote) 

707 

708 Raises 

709 ------ 

710 LynceusFileError 

711 If file doesn't exist and must_exist is True 

712 """ 

713 complete_path = Path(complete_path) 

714 if self.__remote_mode or override_remote_mode: 

715 new_lynceus_file: LynceusFile = _RemoteS3LynceusFile( 

716 complete_path, 

717 self._logger, 

718 self.__s3utils.get_s3filesystem(), 

719 self.__s3utils, 

720 ) 

721 

722 # Forces cache refresh if corresponding file existence if False atm. 

723 # it can happen if the file has been created (from elsewhere) after creation of this Factory. 

724 if must_exist and not new_lynceus_file.exists(reason="check if S3fs cache must be refreshed"): 

725 self.__s3utils.force_cache_refresh(path=_RemoteS3LynceusFile.get_raw_path_from_remote_path(complete_path.parent)) 

726 else: 

727 # Creates subdirectories if needed. 

728 if create_sub_directories: 

729 complete_path.parent.mkdir(parents=True, exist_ok=True) 

730 

731 new_lynceus_file: LynceusFile = _LocalLynceusFile( 

732 complete_path, self._logger 

733 ) 

734 

735 # Safe-guard: ensures corresponding file exists. 

736 if must_exist and not new_lynceus_file.exists(): 

737 raise LynceusFileError( 

738 f'Requested file "{new_lynceus_file}" does not exist.' 

739 ) 

740 

741 return new_lynceus_file 

742 

743 def get_parent_file( 

744 self, 

745 lynceus_file: LynceusFile, 

746 must_exist: bool = True, 

747 override_remote_mode: bool = None, 

748 create_sub_directories: bool = True, 

749 ) -> LynceusFile: 

750 """ 

751 Get a LynceusFile instance for the parent directory of the given file. 

752 

753 Create a file instance representing the parent directory using 

754 the same factory configuration. 

755 

756 Parameters 

757 ---------- 

758 lynceus_file : LynceusFile 

759 File whose parent directory is needed 

760 must_exist : bool, default True 

761 Whether the parent directory must exist 

762 override_remote_mode : bool, optional 

763 Override the default remote mode setting 

764 create_sub_directories : bool, default True 

765 Whether to create parent directories 

766 

767 Returns 

768 ------- 

769 LynceusFile 

770 File instance for the parent directory 

771 """ 

772 return self.create_from_full_path( 

773 lynceus_file.get_parent_path(), 

774 must_exist=must_exist, 

775 override_remote_mode=override_remote_mode, 

776 create_sub_directories=create_sub_directories, 

777 ) 

778 

779 def get_context_info(self): 

780 """ 

781 Get context information about the factory's mode settings. 

782 

783 Return diagnostic information about how the factory was configured 

784 regarding local vs remote mode settings. 

785 

786 Returns 

787 ------- 

788 dict 

789 Context information with mode settings and their sources 

790 """ 

791 return { 

792 "remote_mode (from CLI)": self.__remote_mode, 

793 "remote_mode (automatic)": self.__remote_mode_automatic_activation, 

794 "local_mode (from Config)": self.__local_mode_forced_by_config, 

795 } 

796 

797 def __str__(self): 

798 """ 

799 Get string representation of the file factory. 

800 

801 Return formatted configuration information for debugging and logging. 

802 

803 Returns 

804 ------- 

805 str 

806 Detailed factory configuration information 

807 """ 

808 return self.__string_presentation