""" Workflow to standardize ASDMB crystal parts. This script goes through all parts in the "Clock - ASDMB" category and: 1. Splits the name at "/" - first part becomes name, second part becomes description 2. For parts without a description after splitting, triggers info provider update Uses the PartDB API for all operations. """ import re from typing import Optional, List, Tuple from tqdm import tqdm from config import PARTDB_BASE, PARTDB_TOKEN from apis.partdb_api import PartDB def get_all_parts_in_category(api: PartDB, category_name: str) -> List[dict]: """ Get all parts in a specific category and its subcategories. """ # First, find the category categories = api.list_categories() target_cat_id = None for cat in categories: name = (cat.get("name") or "").strip() if name.lower() == category_name.lower(): target_cat_id = api._extract_id(cat) break if not target_cat_id: print(f"Category '{category_name}' not found!") return [] print(f"Found category '{category_name}' with ID {target_cat_id}") # Find all subcategories subcategory_ids = [target_cat_id] def find_children(parent_id: int): for cat in categories: parent = cat.get("parent") if parent: parent_id_str = None if isinstance(parent, dict): parent_id_str = parent.get("id") or parent.get("_id") elif isinstance(parent, str): parent_id_str = parent if parent_id_str: # Extract just the number if isinstance(parent_id_str, str): parent_num = int(''.join(c for c in parent_id_str if c.isdigit())) else: parent_num = int(parent_id_str) if parent_num == parent_id: child_id = api._extract_id(cat) if child_id and child_id not in subcategory_ids: subcategory_ids.append(child_id) print(f" Found subcategory: {cat.get('name')} (ID: {child_id})") find_children(child_id) find_children(target_cat_id) print(f"Total categories to process: {len(subcategory_ids)}") print(f"Category IDs: {subcategory_ids}") # Fetch all parts in this category with pagination all_parts = [] page = 1 per_page = 30 # Use smaller page size to match API default print("\nFetching parts from API...") while True: params = {"per_page": per_page, "page": page} print(f" Fetching page {page}...") try: parts = api._get("/api/parts", params=params) if isinstance(parts, list): if not parts: print(f" No parts returned, stopping") break # Filter by category matches_this_page = 0 category_ids_found = set() for part in parts: part_cat = part.get("category") part_cat_id = None if isinstance(part_cat, dict): part_cat_id = api._extract_id(part_cat) elif isinstance(part_cat, str): try: if "/categories/" in part_cat: part_cat_id = int(part_cat.strip("/").split("/")[-1]) else: part_cat_id = int(''.join(c for c in part_cat if c.isdigit())) except Exception: pass elif isinstance(part_cat, int): part_cat_id = part_cat # Also check relationships for category if part_cat_id is None: relationships = part.get("relationships", {}) if relationships: rel_cat = relationships.get("category") if isinstance(rel_cat, dict): rel_cat_data = rel_cat.get("data", {}) if isinstance(rel_cat_data, dict): part_cat_id = api._extract_id(rel_cat_data) # Also check attributes if part_cat_id is None: attributes = part.get("attributes", {}) if attributes: attr_cat = attributes.get("category") if attr_cat: if isinstance(attr_cat, dict): part_cat_id = api._extract_id(attr_cat) elif isinstance(attr_cat, (int, str)): try: part_cat_id = int(str(attr_cat).strip("/").split("/")[-1]) except Exception: pass if part_cat_id: category_ids_found.add(part_cat_id) if part_cat_id and part_cat_id in subcategory_ids: all_parts.append(part) matches_this_page += 1 print(f" Got {len(parts)} parts ({matches_this_page} matches, total: {len(all_parts)})") # Continue to next page if we got a full page if len(parts) < per_page: break page += 1 elif isinstance(parts, dict): data = parts.get("data", []) meta = parts.get("meta", {}) if not data: print(f" No data returned, stopping") break # Filter by category matches_this_page = 0 category_ids_found = set() for part in data: part_cat = part.get("category") part_cat_id = None if isinstance(part_cat, dict): part_cat_id = api._extract_id(part_cat) elif isinstance(part_cat, str): try: if "/categories/" in part_cat: part_cat_id = int(part_cat.strip("/").split("/")[-1]) else: part_cat_id = int(''.join(c for c in part_cat if c.isdigit())) except Exception: pass elif isinstance(part_cat, int): part_cat_id = part_cat # Also check relationships for category if part_cat_id is None: relationships = part.get("relationships", {}) if relationships: rel_cat = relationships.get("category") if isinstance(rel_cat, dict): rel_cat_data = rel_cat.get("data", {}) if isinstance(rel_cat_data, dict): part_cat_id = api._extract_id(rel_cat_data) # Also check attributes if part_cat_id is None: attributes = part.get("attributes", {}) if attributes: attr_cat = attributes.get("category") if attr_cat: if isinstance(attr_cat, dict): part_cat_id = api._extract_id(attr_cat) elif isinstance(attr_cat, (int, str)): try: part_cat_id = int(str(attr_cat).strip("/").split("/")[-1]) except Exception: pass if part_cat_id: category_ids_found.add(part_cat_id) if part_cat_id and part_cat_id in subcategory_ids: all_parts.append(part) matches_this_page += 1 print(f" Got {len(data)} parts ({matches_this_page} matches, total: {len(all_parts)})") # Check if there's more pages using meta or data length has_more = False if meta.get("current_page") and meta.get("last_page"): if meta["current_page"] < meta["last_page"]: has_more = True elif len(data) >= per_page: has_more = True if not has_more: break page += 1 # Safety check if page > 100: print(f" Warning: Fetched 100 pages, stopping") break else: break except Exception as e: print(f" Error fetching page {page}: {e}") break print(f"\nFound {len(all_parts)} parts in category") return all_parts def standardize_asdmb_part(api: PartDB, part: dict, dry_run: bool = False) -> Tuple[bool, str, bool]: """ Standardize a single ASDMB crystal part. Returns: (success, message, needs_provider_update) """ part_id = api._extract_id(part) if not part_id: return (False, "No part ID", False) # Get current name and description current_name = part.get("name") or part.get("attributes", {}).get("name") or "" current_desc = part.get("description") or part.get("attributes", {}).get("description") or "" # Split name at "/" to get new name (first part) new_name = current_name if "/" in current_name: new_name = current_name.split("/", 1)[0].strip() # Split description at "/" to get new description (second part) new_description = "" needs_provider_update = False if "/" in current_desc: parts = current_desc.split("/", 1) new_description = parts[1].strip() if len(parts) > 1 else "" if not new_description: needs_provider_update = True elif not current_desc.strip(): # No description at all needs_provider_update = True else: # Has description but no "/" - leave as is new_description = current_desc # Check what needs updating changes = [] if current_name != new_name: changes.append(f"name: '{current_name}' → '{new_name}'") if new_description and current_desc != new_description: changes.append(f"desc: '{current_desc}' → '{new_description}'") if needs_provider_update: changes.append("needs provider update for description") if not changes: return (True, "Already correct", False) if dry_run: return (True, f"Would update: {'; '.join(changes)}", needs_provider_update) # Apply updates try: payload = { "name": new_name } # Only update description if we have one and it changed if new_description and new_description != current_desc: payload["description"] = new_description r = api._patch_merge(f"/api/parts/{part_id}", payload) if r.status_code not in range(200, 300): return (False, f"Failed to update: {r.status_code}", needs_provider_update) result_msg = f"Updated: {'; '.join(changes)}" return (True, result_msg, needs_provider_update) except Exception as e: return (False, f"Update failed: {e}", needs_provider_update) def run_standardize_asdmb(category_name: str = "Clock - ASDMB", dry_run: bool = False, update_providers: bool = False, progress_callback=None): """ Main function to standardize all ASDMB crystal parts. Args: category_name: Name of the category to process (default: "Clock - ASDMB") dry_run: If True, don't make any changes update_providers: If True, trigger provider updates for parts without descriptions progress_callback: Optional callback function(current, total, status_text, should_cancel_func) Returns True if operation should be cancelled """ print("=" * 70) print("ASDMB CRYSTAL STANDARDIZATION") print("=" * 70) print(f"Category: {category_name}") print(f"Mode: {'DRY RUN (no changes)' if dry_run else 'LIVE MODE (will update parts)'}") print(f"Provider updates: {'ENABLED' if update_providers else 'DISABLED'}") print("=" * 70) # Initialize API api = PartDB(PARTDB_BASE, PARTDB_TOKEN) # Get all parts in category print("\nFetching parts from category...") parts = get_all_parts_in_category(api, category_name) if not parts: print("No parts found!") return print(f"\nProcessing {len(parts)} parts...") # Track results successful = 0 failed = 0 skipped = 0 needs_provider = [] # Process each part use_tqdm = not progress_callback iterator = tqdm(parts, desc="Processing parts") if use_tqdm else parts for idx, part in enumerate(iterator): # Check for cancellation if progress_callback: cancelled = progress_callback(idx, len(parts), f"Processing part {idx+1}/{len(parts)}...") if cancelled: print("\n⚠ Operation cancelled by user") break part_name = part.get("name") or "Unknown" part_id = api._extract_id(part) success, message, needs_update = standardize_asdmb_part(api, part, dry_run) if success: if "Already correct" in message or "skipping" in message: skipped += 1 else: successful += 1 print(f"✓ {part_name}: {message}") if needs_update: needs_provider.append((part_id, part_name)) else: failed += 1 print(f"✗ {part_name}: {message}") # Final progress update if progress_callback: progress_callback(len(parts), len(parts), "Complete!") # Summary print("\n" + "=" * 70) print("SUMMARY") print("=" * 70) print(f"Total parts: {len(parts)}") print(f"Updated: {successful}") print(f"Failed: {failed}") print(f"Skipped: {skipped}") print(f"Need provider update: {len(needs_provider)}") if needs_provider and update_providers and not dry_run: print("\n" + "=" * 70) print("TRIGGERING PROVIDER UPDATES") print("=" * 70) # Import selenium flow for provider updates try: from provider.selenium_flow import start_firefox_resilient, ensure_logged_in, run_provider_update_flow from config import HEADLESS_PROVIDER print("Starting browser...") driver = start_firefox_resilient(headless_first=HEADLESS_PROVIDER) print("Logging in...") driver.get(PARTDB_BASE + "/") if not ensure_logged_in(driver, PARTDB_BASE, interactive_ok=True, wait_s=120): print("Failed to log in!") driver.quit() return controller = driver.current_window_handle provider_success = 0 provider_failed = 0 for part_id, part_name in tqdm(needs_provider, desc="Updating from providers"): print(f"\nUpdating {part_name}...") ok, where = run_provider_update_flow(driver, PARTDB_BASE, "/en/", part_id, controller) if ok: provider_success += 1 print(f" ✓ Success") else: provider_failed += 1 print(f" ✗ Failed at: {where}") driver.quit() print("\n" + "=" * 70) print("PROVIDER UPDATE SUMMARY") print("=" * 70) print(f"Successful: {provider_success}") print(f"Failed: {provider_failed}") except Exception as e: print(f"Error during provider updates: {e}") import traceback traceback.print_exc() elif needs_provider and not update_providers: print("\nParts needing provider update:") for part_id, part_name in needs_provider[:10]: # Show first 10 print(f" - {part_name} (ID: {part_id})") if len(needs_provider) > 10: print(f" ... and {len(needs_provider) - 10} more") print("\nRe-run with update_providers=True to trigger provider updates") print("\n" + "=" * 70) if __name__ == "__main__": import sys dry_run = "--dry-run" in sys.argv or "-d" in sys.argv update_providers = "--update-providers" in sys.argv or "-u" in sys.argv run_standardize_asdmb(dry_run=dry_run, update_providers=update_providers)