Added uploads, part renaming, bulk data import acceptance

This commit is contained in:
2025-12-17 13:57:47 +11:00
parent aaa1f7520a
commit ae9e1d6e7e
14 changed files with 3325 additions and 11 deletions

View File

@@ -0,0 +1,163 @@
"""
Workflow to accept bulk import jobs automatically.
This script automates the process of accepting import jobs by:
1. Navigating to the bulk import management page
2. Clicking the "View Results" button for the first import job
3. Finding and clicking "Update Part" buttons (only those without 'disabled' class)
4. Clicking Save twice and Complete for each job (same page, no tabs)
5. Repeating until all jobs are processed
"""
import time
from selenium.webdriver.common.by import By
from config import PARTDB_BASE, UI_LANG_PATH, HEADLESS_CONTROLLER
from provider.selenium_flow import (
start_firefox_resilient,
ensure_logged_in,
accept_bulk_import_jobs
)
def run_accept_import_jobs(job_url: str = None, auto_close: bool = False):
"""
Main function to accept bulk import jobs.
Args:
job_url: Optional URL to the specific import job page.
If None, uses the default bulk import management page.
auto_close: If True, automatically closes the browser after completion.
If False, waits for user input before closing.
"""
print("=== Starting Bulk Import Job Acceptance ===\n")
# Default to the bulk import management page
if job_url is None:
job_url = PARTDB_BASE + "/en/tools/bulk_info_provider_import/manage"
# Start browser
driver = start_firefox_resilient(headless_first=HEADLESS_CONTROLLER)
try:
# Navigate to base URL and login
driver.get(PARTDB_BASE + "/")
if not ensure_logged_in(driver, PARTDB_BASE, interactive_ok=True, wait_s=600):
print("Could not login; aborting.")
return
print("Login successful!\n")
# Navigate to the bulk import management page
print(f"Navigating to: {job_url}")
driver.get(job_url)
time.sleep(2.0)
# Find and click the first "View Results" button for an ACTIVE job
print("Looking for 'View Results' button on active job...")
try:
view_results_button = None
# Find all table rows
rows = driver.find_elements(By.XPATH, "//tbody/tr")
for row in rows:
try:
# Check if this row has an "Active" badge or "In Progress" status
badges = row.find_elements(By.XPATH, ".//span[contains(@class, 'badge')]")
is_active = False
for badge in badges:
badge_text = badge.text.strip().lower()
if 'active' in badge_text or 'in progress' in badge_text:
is_active = True
break
# If this row is active, find its "View Results" button
if is_active:
view_btn = row.find_elements(By.XPATH, ".//a[contains(@class, 'btn') and contains(., 'View Results')]")
if view_btn:
view_results_button = view_btn[0]
print(f"Found active job with 'View Results' button")
break
except Exception as e:
continue
# Fallback: if no active job found, just get the first View Results button
if not view_results_button:
print("No active job found, looking for any 'View Results' button...")
xpaths = [
"//a[contains(@class, 'btn') and contains(., 'View Results')]",
"//a[contains(@href, '/bulk_info_provider_import/step2/') and contains(@class, 'btn-primary')]",
]
for xpath in xpaths:
elements = driver.find_elements(By.XPATH, xpath)
if elements:
view_results_button = elements[0]
break
if view_results_button:
print(f"Clicking 'View Results' button...")
driver.execute_script("arguments[0].scrollIntoView({block:'center'});", view_results_button)
time.sleep(0.5)
view_results_button.click()
time.sleep(2.0)
print("✓ Navigated to results page")
else:
print("Could not find 'View Results' button. Make sure there's an import job to process.")
if not auto_close:
print("Press Enter to close...")
input()
return
except Exception as e:
print(f"Error clicking 'View Results': {e}")
if not auto_close:
print("Press Enter to close...")
input()
return
# Run the automation
print("\nStarting automation...")
print("=" * 70)
successful, failed, skipped = accept_bulk_import_jobs(
driver,
PARTDB_BASE,
UI_LANG_PATH,
job_url=None, # Already on the page
max_iterations=100
)
print("\n" + "=" * 60)
print("AUTOMATION COMPLETE")
print(f"Successfully processed: {successful} jobs")
print(f"Failed: {failed} jobs")
print(f"Skipped (no results): {skipped} jobs")
print("=" * 60)
# Keep browser open for inspection if not auto_close
if not auto_close:
print("\nBrowser will remain open for inspection.")
print("Press Enter to close...")
input()
except Exception as e:
print(f"\nError during automation: {e}")
import traceback
traceback.print_exc()
if not auto_close:
print("\nPress Enter to close...")
input()
finally:
try:
driver.quit()
except Exception:
pass
if __name__ == "__main__":
# You can optionally provide a direct URL to the import job page
# Example: run_accept_import_jobs("https://partdb.neutronservices.duckdns.org/en/import/jobs/123")
run_accept_import_jobs()

View File

@@ -0,0 +1,420 @@
"""
Import parts from Digi-Key CSV exports and update from providers.
This workflow:
1. Scans the "import CSVs" folder for CSV files
2. Reads Digi-Key part numbers and other info from each CSV
3. Creates parts in PartDB if they don't exist
4. Triggers provider updates to fetch full information
5. Sets EDA values based on part type
"""
import os
import csv
import re
from pathlib import Path
from typing import List, Dict, Tuple
from tqdm import tqdm
from config import PARTDB_BASE, PARTDB_TOKEN, HEADLESS_PROVIDER
from apis.partdb_api import PartDB
from provider.selenium_flow import (
start_firefox_resilient, ensure_logged_in, run_provider_update_flow
)
from parsers.values import (
parse_resistance_to_ohms, format_ohms_for_eda,
parse_capacitance_to_farads, format_farads_for_eda
)
def find_csv_files(folder_path: str = "import CSVs") -> List[Path]:
"""Find all CSV files in the import folder."""
folder = Path(folder_path)
if not folder.exists():
print(f"Creating folder: {folder}")
folder.mkdir(parents=True, exist_ok=True)
return []
csv_files = list(folder.glob("*.csv"))
return csv_files
def parse_digikey_csv(csv_path: Path) -> List[Dict[str, str]]:
"""
Parse a Digi-Key CSV export file.
Expected columns (case-insensitive):
- Digi-Key Part Number
- Manufacturer Part Number
- Manufacturer
- Description
- Quantity Available
- Unit Price
Returns list of part dictionaries.
"""
parts = []
with open(csv_path, 'r', encoding='utf-8-sig') as f:
reader = csv.DictReader(f)
# Normalize column names (remove BOM, strip whitespace, lowercase)
if reader.fieldnames:
reader.fieldnames = [name.strip().lower() for name in reader.fieldnames]
for row in reader:
# Skip empty rows
if not any(row.values()):
continue
# Extract relevant fields (try multiple column name variations)
dkpn = (row.get('dk part #') or
row.get('digi-key part number') or
row.get('digikey part number') or
row.get('part number') or '').strip()
# Handle multiple DK part numbers separated by commas
if dkpn and ',' in dkpn:
dkpn = dkpn.split(',')[0].strip()
mpn = (row.get('mfr part #') or
row.get('manufacturer part number') or
row.get('mfr part number') or
row.get('mpn') or '').strip()
manufacturer = (row.get('mfr') or
row.get('manufacturer') or '').strip()
description = (row.get('description') or
row.get('product description') or '').strip()
# Skip if no MPN
if not mpn:
continue
parts.append({
'dkpn': dkpn,
'mpn': mpn,
'manufacturer': manufacturer,
'description': description
})
return parts
def create_part_if_not_exists(api: PartDB, part_info: Dict[str, str]) -> Tuple[bool, int, str]:
"""
Create a part in PartDB if it doesn't already exist.
Returns: (created, part_id, message)
- created: True if part was created, False if it already existed
- part_id: The part ID (existing or newly created)
- message: Status message
"""
mpn = part_info['mpn']
dkpn = part_info.get('dkpn')
manufacturer_name = part_info.get('manufacturer', 'Unknown')
# Check if part already exists
existing_id = api.find_part_exact(dkpn=dkpn, mpn=mpn)
if existing_id:
return (False, existing_id, "Already exists")
# Create minimal part (provider update will fill in details)
try:
# Get or create manufacturer
manufacturer_id = api.ensure_manufacturer(manufacturer_name)
# Use a default category - provider update will suggest better one
# You can change this to a "To Be Categorized" category ID
default_category_id = 1 # Change this to your default category
part_id = api.create_part(
category_id=default_category_id,
manufacturer_id=manufacturer_id,
name=mpn, # Will be updated by provider
mpn=mpn,
description=part_info.get('description', ''),
footprint_id=None,
product_url=f"https://www.digikey.com/product-detail/en/-/{dkpn}" if dkpn else None
)
return (True, part_id, "Created")
except Exception as e:
return (False, 0, f"Failed: {str(e)}")
def set_eda_value_for_part(api: PartDB, part_id: int, mpn: str):
"""
Set the EDA value for a part based on its type.
For passives (resistors, capacitors, inductors): use the component value
For ICs and other components: use the base of the IC name
Args:
api: PartDB API instance
part_id: ID of the part to update
mpn: Manufacturer part number
"""
# Get part details
try:
part = api.get_part(part_id)
name = part.get('name', mpn).upper()
description = part.get('description', '').upper()
# Detect part type and extract value
eda_value = None
# Check for resistors
if any(indicator in name or indicator in description for indicator in ['OHM', 'Ω', 'RESISTOR', 'RES ']):
try:
ohms = parse_resistance_to_ohms(name)
if ohms is not None:
eda_value = format_ohms_for_eda(ohms)
print(f" Setting resistor EDA value: {eda_value}")
except:
pass
# Check for capacitors
elif any(indicator in name or indicator in description for indicator in ['FARAD', 'F ', 'CAP', 'CAPACITOR']):
try:
farads = parse_capacitance_to_farads(name)
if farads is not None:
eda_value = format_farads_for_eda(farads)
print(f" Setting capacitor EDA value: {eda_value}")
except:
pass
# Check for inductors
elif any(indicator in name or indicator in description for indicator in ['INDUCTOR', 'HENRY', 'H ', 'IND ']):
# Extract inductance value (similar pattern to resistance/capacitance)
# Look for patterns like "10uH", "100nH", etc.
match = re.search(r'(\d+\.?\d*)\s*(M|K|µ|U|N|P)?H', name, re.IGNORECASE)
if match:
value = float(match.group(1))
unit = match.group(2).upper() if match.group(2) else ''
# Convert to henries
if unit in ['M', 'MH']:
value *= 1e-3
elif unit in ['µ', 'U', 'UH']:
value *= 1e-6
elif unit in ['N', 'NH']:
value *= 1e-9
elif unit in ['P', 'PH']:
value *= 1e-12
# Format for EDA
if value >= 1:
eda_value = f"{value:.2f}H"
elif value >= 1e-3:
eda_value = f"{value * 1e3:.2f}mH"
elif value >= 1e-6:
eda_value = f"{value * 1e6:.2f}µH"
else:
eda_value = f"{value * 1e9:.2f}nH"
print(f" Setting inductor EDA value: {eda_value}")
# For ICs and other components, use base name
if eda_value is None:
# Extract base name (remove package suffix)
# Common IC patterns: "TPS54302DDCR" -> "TPS54302"
# Look for alphanumeric base followed by optional package code
# Try to extract the base part number (before package code)
match = re.match(r'^([A-Z0-9]+?)([A-Z]{2,4}[A-Z]?)?$', mpn)
if match:
base_name = match.group(1)
# If we found a reasonable base (at least 5 chars), use it
if len(base_name) >= 5:
eda_value = base_name
print(f" Setting IC/component EDA value: {eda_value}")
else:
# Use full MPN if base is too short
eda_value = mpn
print(f" Setting EDA value to full MPN: {eda_value}")
else:
# Use full MPN if pattern doesn't match
eda_value = mpn
print(f" Setting EDA value to full MPN: {eda_value}")
# Set the EDA value
if eda_value:
api.patch_eda_value(part_id, eda_value)
except Exception as e:
raise Exception(f"Error setting EDA value: {e}")
def run_import_from_csv(folder_path: str = "import CSVs", update_providers: bool = True, progress_callback=None):
"""
Main function to import parts from CSV files.
Args:
folder_path: Path to folder containing CSV files
update_providers: If True, trigger provider updates after creating parts
progress_callback: Optional callback for progress updates
"""
print("=" * 70)
print("IMPORT PARTS FROM CSV FILES")
print("=" * 70)
print(f"Folder: {folder_path}")
print(f"Provider updates: {'ENABLED' if update_providers else 'DISABLED'}")
print("=" * 70)
print()
# Find CSV files
csv_files = find_csv_files(folder_path)
if not csv_files:
print(f"No CSV files found in '{folder_path}'")
print("Place Digi-Key CSV exports in this folder and try again.")
return
print(f"Found {len(csv_files)} CSV file(s):")
for csv_file in csv_files:
print(f" - {csv_file.name}")
print()
# Initialize API
api = PartDB(PARTDB_BASE, PARTDB_TOKEN)
# Process each CSV file
all_created_parts = []
total_processed = 0
total_created = 0
total_skipped = 0
total_failed = 0
for csv_file in csv_files:
print(f"\nProcessing: {csv_file.name}")
print("-" * 70)
# Parse CSV
try:
parts = parse_digikey_csv(csv_file)
print(f"Found {len(parts)} parts in CSV")
except Exception as e:
print(f"Error parsing CSV: {e}")
continue
if not parts:
print("No valid parts found in CSV")
continue
# Create parts
use_tqdm = not progress_callback
iterator = tqdm(parts, desc="Creating parts") if use_tqdm else parts
for idx, part_info in enumerate(iterator):
# Check for cancellation
if progress_callback:
cancelled = progress_callback(
total_processed + idx,
sum(len(parse_digikey_csv(f)) for f in csv_files),
f"Processing {csv_file.name}: {part_info['mpn']}"
)
if cancelled:
print("\n⚠ Operation cancelled by user")
return
created, part_id, message = create_part_if_not_exists(api, part_info)
if created:
total_created += 1
all_created_parts.append((part_id, part_info['mpn']))
if not use_tqdm:
print(f"✓ Created: {part_info['mpn']} (ID: {part_id})")
elif part_id > 0:
total_skipped += 1
else:
total_failed += 1
print(f"✗ Failed: {part_info['mpn']} - {message}")
total_processed += 1
# Summary
print("\n" + "=" * 70)
print("IMPORT SUMMARY")
print("=" * 70)
print(f"Total parts processed: {total_processed}")
print(f"Created: {total_created}")
print(f"Skipped (exist): {total_skipped}")
print(f"Failed: {total_failed}")
print("=" * 70)
# Provider updates
if all_created_parts and update_providers:
print("\n" + "=" * 70)
print("TRIGGERING PROVIDER UPDATES")
print("=" * 70)
print(f"Updating {len(all_created_parts)} newly created parts from providers...")
try:
print("Starting browser...")
driver = start_firefox_resilient(headless_first=HEADLESS_PROVIDER)
print("Logging in...")
driver.get(PARTDB_BASE + "/")
if not ensure_logged_in(driver, PARTDB_BASE, interactive_ok=True, wait_s=120):
print("Failed to log in!")
driver.quit()
return
controller = driver.current_window_handle
provider_success = 0
provider_failed = 0
use_tqdm = not progress_callback
iterator = tqdm(all_created_parts, desc="Updating from providers") if use_tqdm else all_created_parts
for idx, (part_id, mpn) in enumerate(iterator):
# Check for cancellation
if progress_callback:
cancelled = progress_callback(
idx,
len(all_created_parts),
f"Updating from providers: {mpn}"
)
if cancelled:
print("\n⚠ Provider updates cancelled by user")
break
try:
success = run_provider_update_flow(driver, PARTDB_BASE, part_id, controller)
if success:
provider_success += 1
# Set EDA value after provider update
try:
set_eda_value_for_part(api, part_id, mpn)
except Exception as e:
print(f" Warning: Could not set EDA value for {mpn}: {e}")
else:
provider_failed += 1
print(f"✗ Provider update failed for: {mpn}")
except Exception as e:
provider_failed += 1
print(f"✗ Error updating {mpn}: {e}")
driver.quit()
print("\n" + "=" * 70)
print("PROVIDER UPDATE SUMMARY")
print("=" * 70)
print(f"Successful: {provider_success}")
print(f"Failed: {provider_failed}")
print("=" * 70)
except Exception as e:
print(f"Error during provider updates: {e}")
print("\nDone!")
if __name__ == "__main__":
import sys
# Check for --no-provider flag
update_providers = "--no-provider" not in sys.argv
run_import_from_csv(update_providers=update_providers)

View File

@@ -0,0 +1,456 @@
"""
Workflow to standardize ASDMB crystal parts.
This script goes through all parts in the "Clock - ASDMB" category and:
1. Splits the name at "/" - first part becomes name, second part becomes description
2. For parts without a description after splitting, triggers info provider update
Uses the PartDB API for all operations.
"""
import re
from typing import Optional, List, Tuple
from tqdm import tqdm
from config import PARTDB_BASE, PARTDB_TOKEN
from apis.partdb_api import PartDB
def get_all_parts_in_category(api: PartDB, category_name: str) -> List[dict]:
"""
Get all parts in a specific category and its subcategories.
"""
# First, find the category
categories = api.list_categories()
target_cat_id = None
for cat in categories:
name = (cat.get("name") or "").strip()
if name.lower() == category_name.lower():
target_cat_id = api._extract_id(cat)
break
if not target_cat_id:
print(f"Category '{category_name}' not found!")
return []
print(f"Found category '{category_name}' with ID {target_cat_id}")
# Find all subcategories
subcategory_ids = [target_cat_id]
def find_children(parent_id: int):
for cat in categories:
parent = cat.get("parent")
if parent:
parent_id_str = None
if isinstance(parent, dict):
parent_id_str = parent.get("id") or parent.get("_id")
elif isinstance(parent, str):
parent_id_str = parent
if parent_id_str:
# Extract just the number
if isinstance(parent_id_str, str):
parent_num = int(''.join(c for c in parent_id_str if c.isdigit()))
else:
parent_num = int(parent_id_str)
if parent_num == parent_id:
child_id = api._extract_id(cat)
if child_id and child_id not in subcategory_ids:
subcategory_ids.append(child_id)
print(f" Found subcategory: {cat.get('name')} (ID: {child_id})")
find_children(child_id)
find_children(target_cat_id)
print(f"Total categories to process: {len(subcategory_ids)}")
print(f"Category IDs: {subcategory_ids}")
# Fetch all parts in this category with pagination
all_parts = []
page = 1
per_page = 30 # Use smaller page size to match API default
print("\nFetching parts from API...")
while True:
params = {"per_page": per_page, "page": page}
print(f" Fetching page {page}...")
try:
parts = api._get("/api/parts", params=params)
if isinstance(parts, list):
if not parts:
print(f" No parts returned, stopping")
break
# Filter by category
matches_this_page = 0
category_ids_found = set()
for part in parts:
part_cat = part.get("category")
part_cat_id = None
if isinstance(part_cat, dict):
part_cat_id = api._extract_id(part_cat)
elif isinstance(part_cat, str):
try:
if "/categories/" in part_cat:
part_cat_id = int(part_cat.strip("/").split("/")[-1])
else:
part_cat_id = int(''.join(c for c in part_cat if c.isdigit()))
except Exception:
pass
elif isinstance(part_cat, int):
part_cat_id = part_cat
# Also check relationships for category
if part_cat_id is None:
relationships = part.get("relationships", {})
if relationships:
rel_cat = relationships.get("category")
if isinstance(rel_cat, dict):
rel_cat_data = rel_cat.get("data", {})
if isinstance(rel_cat_data, dict):
part_cat_id = api._extract_id(rel_cat_data)
# Also check attributes
if part_cat_id is None:
attributes = part.get("attributes", {})
if attributes:
attr_cat = attributes.get("category")
if attr_cat:
if isinstance(attr_cat, dict):
part_cat_id = api._extract_id(attr_cat)
elif isinstance(attr_cat, (int, str)):
try:
part_cat_id = int(str(attr_cat).strip("/").split("/")[-1])
except Exception:
pass
if part_cat_id:
category_ids_found.add(part_cat_id)
if part_cat_id and part_cat_id in subcategory_ids:
all_parts.append(part)
matches_this_page += 1
print(f" Got {len(parts)} parts ({matches_this_page} matches, total: {len(all_parts)})")
# Continue to next page if we got a full page
if len(parts) < per_page:
break
page += 1
elif isinstance(parts, dict):
data = parts.get("data", [])
meta = parts.get("meta", {})
if not data:
print(f" No data returned, stopping")
break
# Filter by category
matches_this_page = 0
category_ids_found = set()
for part in data:
part_cat = part.get("category")
part_cat_id = None
if isinstance(part_cat, dict):
part_cat_id = api._extract_id(part_cat)
elif isinstance(part_cat, str):
try:
if "/categories/" in part_cat:
part_cat_id = int(part_cat.strip("/").split("/")[-1])
else:
part_cat_id = int(''.join(c for c in part_cat if c.isdigit()))
except Exception:
pass
elif isinstance(part_cat, int):
part_cat_id = part_cat
# Also check relationships for category
if part_cat_id is None:
relationships = part.get("relationships", {})
if relationships:
rel_cat = relationships.get("category")
if isinstance(rel_cat, dict):
rel_cat_data = rel_cat.get("data", {})
if isinstance(rel_cat_data, dict):
part_cat_id = api._extract_id(rel_cat_data)
# Also check attributes
if part_cat_id is None:
attributes = part.get("attributes", {})
if attributes:
attr_cat = attributes.get("category")
if attr_cat:
if isinstance(attr_cat, dict):
part_cat_id = api._extract_id(attr_cat)
elif isinstance(attr_cat, (int, str)):
try:
part_cat_id = int(str(attr_cat).strip("/").split("/")[-1])
except Exception:
pass
if part_cat_id:
category_ids_found.add(part_cat_id)
if part_cat_id and part_cat_id in subcategory_ids:
all_parts.append(part)
matches_this_page += 1
print(f" Got {len(data)} parts ({matches_this_page} matches, total: {len(all_parts)})")
# Check if there's more pages using meta or data length
has_more = False
if meta.get("current_page") and meta.get("last_page"):
if meta["current_page"] < meta["last_page"]:
has_more = True
elif len(data) >= per_page:
has_more = True
if not has_more:
break
page += 1
# Safety check
if page > 100:
print(f" Warning: Fetched 100 pages, stopping")
break
else:
break
except Exception as e:
print(f" Error fetching page {page}: {e}")
break
print(f"\nFound {len(all_parts)} parts in category")
return all_parts
def standardize_asdmb_part(api: PartDB, part: dict, dry_run: bool = False) -> Tuple[bool, str, bool]:
"""
Standardize a single ASDMB crystal part.
Returns: (success, message, needs_provider_update)
"""
part_id = api._extract_id(part)
if not part_id:
return (False, "No part ID", False)
# Get current name and description
current_name = part.get("name") or part.get("attributes", {}).get("name") or ""
current_desc = part.get("description") or part.get("attributes", {}).get("description") or ""
# Split name at "/" to get new name (first part)
new_name = current_name
if "/" in current_name:
new_name = current_name.split("/", 1)[0].strip()
# Split description at "/" to get new description (second part)
new_description = ""
needs_provider_update = False
if "/" in current_desc:
parts = current_desc.split("/", 1)
new_description = parts[1].strip() if len(parts) > 1 else ""
if not new_description:
needs_provider_update = True
elif not current_desc.strip():
# No description at all
needs_provider_update = True
else:
# Has description but no "/" - leave as is
new_description = current_desc
# Check what needs updating
changes = []
if current_name != new_name:
changes.append(f"name: '{current_name}''{new_name}'")
if new_description and current_desc != new_description:
changes.append(f"desc: '{current_desc}''{new_description}'")
if needs_provider_update:
changes.append("needs provider update for description")
if not changes:
return (True, "Already correct", False)
if dry_run:
return (True, f"Would update: {'; '.join(changes)}", needs_provider_update)
# Apply updates
try:
payload = {
"name": new_name
}
# Only update description if we have one and it changed
if new_description and new_description != current_desc:
payload["description"] = new_description
r = api._patch_merge(f"/api/parts/{part_id}", payload)
if r.status_code not in range(200, 300):
return (False, f"Failed to update: {r.status_code}", needs_provider_update)
result_msg = f"Updated: {'; '.join(changes)}"
return (True, result_msg, needs_provider_update)
except Exception as e:
return (False, f"Update failed: {e}", needs_provider_update)
def run_standardize_asdmb(category_name: str = "Clock - ASDMB", dry_run: bool = False, update_providers: bool = False, progress_callback=None):
"""
Main function to standardize all ASDMB crystal parts.
Args:
category_name: Name of the category to process (default: "Clock - ASDMB")
dry_run: If True, don't make any changes
update_providers: If True, trigger provider updates for parts without descriptions
progress_callback: Optional callback function(current, total, status_text, should_cancel_func)
Returns True if operation should be cancelled
"""
print("=" * 70)
print("ASDMB CRYSTAL STANDARDIZATION")
print("=" * 70)
print(f"Category: {category_name}")
print(f"Mode: {'DRY RUN (no changes)' if dry_run else 'LIVE MODE (will update parts)'}")
print(f"Provider updates: {'ENABLED' if update_providers else 'DISABLED'}")
print("=" * 70)
# Initialize API
api = PartDB(PARTDB_BASE, PARTDB_TOKEN)
# Get all parts in category
print("\nFetching parts from category...")
parts = get_all_parts_in_category(api, category_name)
if not parts:
print("No parts found!")
return
print(f"\nProcessing {len(parts)} parts...")
# Track results
successful = 0
failed = 0
skipped = 0
needs_provider = []
# Process each part
use_tqdm = not progress_callback
iterator = tqdm(parts, desc="Processing parts") if use_tqdm else parts
for idx, part in enumerate(iterator):
# Check for cancellation
if progress_callback:
cancelled = progress_callback(idx, len(parts), f"Processing part {idx+1}/{len(parts)}...")
if cancelled:
print("\n⚠ Operation cancelled by user")
break
part_name = part.get("name") or "Unknown"
part_id = api._extract_id(part)
success, message, needs_update = standardize_asdmb_part(api, part, dry_run)
if success:
if "Already correct" in message or "skipping" in message:
skipped += 1
else:
successful += 1
print(f"{part_name}: {message}")
if needs_update:
needs_provider.append((part_id, part_name))
else:
failed += 1
print(f"{part_name}: {message}")
# Final progress update
if progress_callback:
progress_callback(len(parts), len(parts), "Complete!")
# Summary
print("\n" + "=" * 70)
print("SUMMARY")
print("=" * 70)
print(f"Total parts: {len(parts)}")
print(f"Updated: {successful}")
print(f"Failed: {failed}")
print(f"Skipped: {skipped}")
print(f"Need provider update: {len(needs_provider)}")
if needs_provider and update_providers and not dry_run:
print("\n" + "=" * 70)
print("TRIGGERING PROVIDER UPDATES")
print("=" * 70)
# Import selenium flow for provider updates
try:
from provider.selenium_flow import start_firefox_resilient, ensure_logged_in, run_provider_update_flow
from config import HEADLESS_PROVIDER
print("Starting browser...")
driver = start_firefox_resilient(headless_first=HEADLESS_PROVIDER)
print("Logging in...")
driver.get(PARTDB_BASE + "/")
if not ensure_logged_in(driver, PARTDB_BASE, interactive_ok=True, wait_s=120):
print("Failed to log in!")
driver.quit()
return
controller = driver.current_window_handle
provider_success = 0
provider_failed = 0
for part_id, part_name in tqdm(needs_provider, desc="Updating from providers"):
print(f"\nUpdating {part_name}...")
ok, where = run_provider_update_flow(driver, PARTDB_BASE, "/en/", part_id, controller)
if ok:
provider_success += 1
print(f" ✓ Success")
else:
provider_failed += 1
print(f" ✗ Failed at: {where}")
driver.quit()
print("\n" + "=" * 70)
print("PROVIDER UPDATE SUMMARY")
print("=" * 70)
print(f"Successful: {provider_success}")
print(f"Failed: {provider_failed}")
except Exception as e:
print(f"Error during provider updates: {e}")
import traceback
traceback.print_exc()
elif needs_provider and not update_providers:
print("\nParts needing provider update:")
for part_id, part_name in needs_provider[:10]: # Show first 10
print(f" - {part_name} (ID: {part_id})")
if len(needs_provider) > 10:
print(f" ... and {len(needs_provider) - 10} more")
print("\nRe-run with update_providers=True to trigger provider updates")
print("\n" + "=" * 70)
if __name__ == "__main__":
import sys
dry_run = "--dry-run" in sys.argv or "-d" in sys.argv
update_providers = "--update-providers" in sys.argv or "-u" in sys.argv
run_standardize_asdmb(dry_run=dry_run, update_providers=update_providers)

View File

@@ -0,0 +1,446 @@
"""
Workflow to standardize components in PartDB.
For parts in the Passives category:
- Sets the name to "Value Package" format
- Sets the description to "MPN Value Package Tolerance Voltage/Current/Power" format
- Sets the EDA value to match the component value
- Fixes names/descriptions with "/" separators
For parts in other categories:
- Checks if EDA value is set, if not sets it to the part name
- Fixes names/descriptions with "/" separators
"""
import re
from typing import Optional, List, Tuple
from tqdm import tqdm
from config import PARTDB_BASE, PARTDB_TOKEN
from apis.partdb_api import PartDB
from workflows.standardize_passives import standardize_part as standardize_passive_part
def fix_slash_separated_fields(name: str, description: str) -> Tuple[str, str]:
"""
Fix names and descriptions that have two parts separated by '/'.
For example: "ABC123/XYZ789" -> "ABC123"
Returns:
Tuple of (fixed_name, fixed_description)
"""
fixed_name = name
fixed_desc = description
# Check if name has a slash with content on both sides
if '/' in name:
parts = name.split('/')
if len(parts) == 2 and parts[0].strip() and parts[1].strip():
# Take the first part
fixed_name = parts[0].strip()
# Check if description has a slash with content on both sides
if '/' in description:
parts = description.split('/')
if len(parts) == 2 and parts[0].strip() and parts[1].strip():
# Take the first part
fixed_desc = parts[0].strip()
return fixed_name, fixed_desc
def standardize_non_passive_part(api: PartDB, part: dict, dry_run: bool = False) -> Tuple[bool, str]:
"""
Standardize a non-passive component.
For non-passives:
1. Fix slash-separated names/descriptions
2. Ensure EDA value is set (use name if not set)
Returns:
(success: bool, message: str)
"""
part_id = api._extract_id(part)
current_name = part.get("name", "")
current_desc = part.get("description", "")
current_eda = part.get("value", "")
# Fix slash-separated fields
new_name, new_desc = fix_slash_separated_fields(current_name, current_desc)
# Check if EDA value needs to be set
new_eda = current_eda
if not current_eda or current_eda.strip() == "":
new_eda = current_name
# Check what needs to be changed
changes = []
if current_name != new_name:
changes.append(f"name: '{current_name}' -> '{new_name}'")
if current_desc != new_desc:
changes.append(f"desc: '{current_desc}' -> '{new_desc}'")
if current_eda != new_eda:
changes.append(f"EDA: '{current_eda}' -> '{new_eda}'")
if not changes:
return (True, "Already correct")
if dry_run:
return (True, f"Would update: {'; '.join(changes)}")
# Apply updates
try:
# Update name and description if needed
if current_name != new_name or current_desc != new_desc:
payload = {}
if current_name != new_name:
payload["name"] = new_name
if current_desc != new_desc:
payload["description"] = new_desc
r = api._patch_merge(f"/api/parts/{part_id}", payload)
if r.status_code not in range(200, 300):
return (False, f"Failed to update name/desc: {r.status_code}")
# Update EDA value if needed
if current_eda != new_eda:
success = api.patch_eda_value(part_id, new_eda)
if not success:
return (False, "Failed to update EDA value")
return (True, f"Updated: {'; '.join(changes)}")
except Exception as e:
return (False, f"Update failed: {e}")
def get_all_parts_paginated(api: PartDB) -> List[dict]:
"""
Get all parts from PartDB using pagination.
"""
all_parts = []
page = 1
per_page = 30
print("Fetching all parts from API...")
while True:
params = {"per_page": per_page, "page": page}
print(f" Fetching page {page}...")
try:
parts = api._get("/api/parts", params=params)
if isinstance(parts, list):
if not parts:
break
all_parts.extend(parts)
print(f" Got {len(parts)} parts (total: {len(all_parts)})")
page += 1
else:
print(f" Unexpected response type: {type(parts)}")
break
except Exception as e:
print(f" Error fetching parts: {e}")
break
print(f"Total parts fetched: {len(all_parts)}")
return all_parts
def is_part_in_passives_category(api: PartDB, part: dict, passives_category_ids: List[int]) -> bool:
"""
Check if a part belongs to the Passives category or any of its subcategories.
"""
category = part.get("category")
if not category:
return False
# Extract category ID
if isinstance(category, dict):
cat_id_str = category.get("id") or category.get("_id")
elif isinstance(category, str):
cat_id_str = category
else:
return False
# Parse the ID
try:
if isinstance(cat_id_str, str):
cat_id = int(''.join(c for c in cat_id_str if c.isdigit()))
else:
cat_id = int(cat_id_str)
return cat_id in passives_category_ids
except:
return False
def get_passives_category_ids(api: PartDB) -> List[int]:
"""
Get all category IDs for Passives and its subcategories.
"""
categories = api.list_categories()
target_cat_id = None
# Find the Passives category
for cat in categories:
name = (cat.get("name") or "").strip()
if name.lower() == "passives":
target_cat_id = api._extract_id(cat)
break
if not target_cat_id:
print("Warning: Passives category not found!")
return []
# Find all subcategories
category_ids = [target_cat_id]
def find_children(parent_id: int):
for cat in categories:
parent = cat.get("parent")
if parent:
parent_id_str = None
if isinstance(parent, dict):
parent_id_str = parent.get("id") or parent.get("_id")
elif isinstance(parent, str):
parent_id_str = parent
if parent_id_str:
try:
if isinstance(parent_id_str, str):
parent_num = int(''.join(c for c in parent_id_str if c.isdigit()))
else:
parent_num = int(parent_id_str)
if parent_num == parent_id:
child_id = api._extract_id(cat)
if child_id and child_id not in category_ids:
category_ids.append(child_id)
find_children(child_id)
except:
pass
find_children(target_cat_id)
print(f"Found Passives category with {len(category_ids)} total categories (including subcategories)")
return category_ids
def run_standardize_components(dry_run: bool = False, progress_callback=None):
"""
Main function to standardize all components.
For passives: Full standardization (value, format, etc.)
For others: Fix slashes, ensure EDA value is set
Args:
dry_run: If True, only show what would be changed without making changes
progress_callback: Optional callback function(current, total, status_text)
Returns True if operation should be cancelled
"""
print("=" * 70)
print("COMPONENT STANDARDIZATION")
print("=" * 70)
print(f"Mode: {'DRY RUN (no changes)' if dry_run else 'LIVE (making changes)'}")
print("=" * 70)
print()
# Initialize API
api = PartDB(PARTDB_BASE, PARTDB_TOKEN)
# Get passives category IDs
passives_ids = get_passives_category_ids(api)
# Get all parts
print("\nFetching all parts...")
all_parts = get_all_parts_paginated(api)
if not all_parts:
print("No parts found!")
return
# Separate passives from others
passives = []
non_passives = []
for part in all_parts:
if is_part_in_passives_category(api, part, passives_ids):
passives.append(part)
else:
non_passives.append(part)
print(f"\nFound {len(passives)} passive components")
print(f"Found {len(non_passives)} non-passive components")
print(f"Total: {len(all_parts)} parts")
print()
# Statistics
passive_success = 0
passive_already_correct = 0
passive_failed = 0
non_passive_success = 0
non_passive_already_correct = 0
non_passive_failed = 0
errors = []
# Calculate total for progress
total_parts = len(all_parts)
processed = 0
# Process passives
if passives:
print("=" * 70)
print("PROCESSING PASSIVE COMPONENTS")
print("=" * 70)
bar = tqdm(passives, desc="Passives", unit="part") if not progress_callback else None
for part in passives:
# Check for cancellation
if progress_callback:
cancelled = progress_callback(
processed, total_parts,
f"Processing passives ({processed+1}/{total_parts})..."
)
if cancelled:
print("\n⚠ Operation cancelled by user")
if bar:
bar.close()
return
part_id = api._extract_id(part)
mpn = part.get("manufacturer_product_number") or part.get("mpn") or f"ID:{part_id}"
if bar:
bar.set_postfix_str(mpn[:30])
success, message = standardize_passive_part(api, part, dry_run=dry_run)
if success:
if "Already correct" in message:
passive_already_correct += 1
else:
passive_success += 1
if bar:
tqdm.write(f"{mpn}: {message}")
else:
print(f"{mpn}: {message}")
else:
passive_failed += 1
errors.append((mpn, message))
if bar:
tqdm.write(f"{mpn}: {message}")
else:
print(f"{mpn}: {message}")
processed += 1
if bar:
bar.close()
# Process non-passives
if non_passives:
print()
print("=" * 70)
print("PROCESSING NON-PASSIVE COMPONENTS")
print("=" * 70)
bar = tqdm(non_passives, desc="Others", unit="part") if not progress_callback else None
for part in non_passives:
# Check for cancellation
if progress_callback:
cancelled = progress_callback(
processed, total_parts,
f"Processing non-passives ({processed+1}/{total_parts})..."
)
if cancelled:
print("\n⚠ Operation cancelled by user")
if bar:
bar.close()
return
part_id = api._extract_id(part)
mpn = part.get("manufacturer_product_number") or part.get("mpn") or f"ID:{part_id}"
if bar:
bar.set_postfix_str(mpn[:30])
success, message = standardize_non_passive_part(api, part, dry_run=dry_run)
if success:
if "Already correct" in message:
non_passive_already_correct += 1
else:
non_passive_success += 1
if bar:
tqdm.write(f"{mpn}: {message}")
else:
print(f"{mpn}: {message}")
else:
non_passive_failed += 1
errors.append((mpn, message))
if bar:
tqdm.write(f"{mpn}: {message}")
else:
print(f"{mpn}: {message}")
processed += 1
if bar:
bar.close()
# Final progress update
if progress_callback:
progress_callback(total_parts, total_parts, "Complete!")
# Print summary
print()
print("=" * 70)
print("SUMMARY")
print("=" * 70)
print(f"PASSIVE COMPONENTS:")
print(f" Total processed: {len(passives)}")
print(f" Already correct: {passive_already_correct}")
print(f" Successfully updated: {passive_success}")
print(f" Failed: {passive_failed}")
print()
print(f"NON-PASSIVE COMPONENTS:")
print(f" Total processed: {len(non_passives)}")
print(f" Already correct: {non_passive_already_correct}")
print(f" Successfully updated: {non_passive_success}")
print(f" Failed: {non_passive_failed}")
print()
print(f"TOTAL:")
print(f" Parts processed: {total_parts}")
print(f" Successfully updated: {passive_success + non_passive_success}")
print(f" Failed: {passive_failed + non_passive_failed}")
print("=" * 70)
if errors:
print()
print("ERRORS:")
for mpn, msg in errors[:20]: # Show first 20 errors
print(f" {mpn}: {msg}")
if len(errors) > 20:
print(f" ... and {len(errors) - 20} more")
print()
if __name__ == "__main__":
import sys
# Check for dry-run flag
dry_run = "--dry-run" in sys.argv or "-n" in sys.argv
if dry_run:
print("Running in DRY RUN mode - no changes will be made")
print()
run_standardize_components(dry_run=dry_run)

File diff suppressed because it is too large Load Diff