import re from typing import Dict, List, Tuple NEXT_TAG_RE = re.compile(r'(PICK|K14|30P|1P|K10|K1|D|T|Z)') def tokenize(flat: str) -> List[Tuple[str, str]]: tokens: List[Tuple[str, str]] = [] s = flat p_idx = s.find('P') if p_idx == -1: return tokens start_val = p_idx + 1 m = NEXT_TAG_RE.search(s, start_val) if m: tokens.append(('P', s[start_val:m.start()].strip())) else: tokens.append(('P', s[start_val:].strip())) return tokens while m: tag = m.group(1) start_val = m.end() m_next = NEXT_TAG_RE.search(s, start_val) if m_next: tokens.append((tag, s[start_val:m_next.start()].strip())) m = m_next else: tokens.append((tag, s[start_val:].strip())) break return tokens def _first(tokens: List[Tuple[str, str]], tag: str) -> str: for t, v in tokens: if t == tag: return v return "" def parse_digikey(flat: str) -> Dict[str, str]: tokens = tokenize(flat) return { 'DigiKeyPart' : _first(tokens, 'P'), 'MfrPart' : _first(tokens, '1P'), 'CustomerPart' : _first(tokens, '30P'), 'InternalLot1' : _first(tokens, 'K1'), 'InternalLot2' : _first(tokens, 'K10'), 'DateCodeRaw' : _first(tokens, 'D'), 'TraceID' : _first(tokens, 'T'), 'PackageSerial': _first(tokens, 'K14'), 'PickTicket' : _first(tokens, 'PICK'), }