import re import sys from collections import defaultdict def parse_hashcat_benchmark(file_path): """ Parse a single hashcat benchmark file and return: - device_name (str) - hash_modes: list of dicts with keys: 'mode', 'name', 'speed' """ device_name = None hash_modes = [] with open(file_path, 'r', encoding='utf-8') as f: content = f.read() # Extract device name: capture everything between '* Device #\d+: ' and first comma # Ignore lines containing "skipped" for line in content.splitlines(): line = line.strip() # Match device line: "* Device #\d+: , ..." match = re.match(r'^\* Device #\d+: (.*?),', line) if match: device_candidate = match.group(1).strip() # Skip if this line contains "skipped" if "skipped" in line: continue device_name = device_candidate break # Assume first non-skipped device is the one used if not device_name: raise ValueError(f"No valid device found in {file_path}") # Extract hash modes: * Hash-Mode X (name) followed by Speed line hash_mode_header_pattern = r'\* Hash-Mode\s+(\d+)\s+\((.*)\)' speed_pattern = r'Speed\.#01\.+:\s+([\d.]+\s+.*H/s)' headers = list(re.finditer(hash_mode_header_pattern, content)) for header in headers: mode_num = header.group(1) mode_name = header.group(2) start_pos = header.end() next_header_match = None for next_header in headers: if next_header.start() > start_pos: next_header_match = next_header break end_pos = next_header_match.start() if next_header_match else len(content) section = content[start_pos:end_pos] speed_match = re.search(speed_pattern, section) if speed_match: speed = speed_match.group(1) hash_modes.append({ 'mode': mode_num, 'name': mode_name, 'speed': speed }) return device_name, hash_modes def main(): if len(sys.argv) < 2: print("Usage: python parse_hashcat_multiple.py [file2] [file3] ...") print("Each file is a hashcat benchmark output from a different device.") sys.exit(1) file_paths = sys.argv[1:] device_speeds = {} # device_name -> list of {mode, name, speed} all_hash_modes = {} # (mode, name) -> list of speeds (one per device) # Parse each file for file_path in file_paths: try: device_name, hash_modes = parse_hashcat_benchmark(file_path) device_speeds[device_name] = hash_modes # Store speeds by (mode, name) for later merging for mode_info in hash_modes: key = (mode_info['mode'], mode_info['name']) if key not in all_hash_modes: all_hash_modes[key] = [] all_hash_modes[key].append(mode_info['speed']) except FileNotFoundError: print(f"Warning: File '{file_path}' not found. Skipping.") except ValueError as e: print(f"Warning: {e}. Skipping {file_path}.") except Exception as e: print(f"Warning: Unexpected error parsing {file_path}: {e}. Skipping.") if not all_hash_modes: print("Error: No valid data found in any input files.") sys.exit(1) # Determine all devices (ordered by file order) devices = list(device_speeds.keys()) # Build Markdown table markdown = "# Hashcat Benchmark Comparison\n\n" # Header: Hash-Mode columns + one Speed column per device markdown += "| Hash-Mode (number) | Hash-Mode (name) |" for device in devices: # Escape device names that might contain pipes or markdown chars escaped_device = device.replace('|', '\\|') markdown += f" {escaped_device} |" markdown += "\n" # Separator row markdown += "|" + "|".join(["--------------------"] + ["------------------"] + ["-------"] * len(devices)) + "|\n" # Rows: one per hash mode for (mode_num, mode_name), speeds in sorted(all_hash_modes.items(), key=lambda x: int(x[0][0])): # Ensure we have exactly one speed per device (pad with 'N/A' if missing) speed_row = [] device_to_speed = {d: s for d, s in zip(devices, speeds)} for device in devices: speed_row.append(device_to_speed.get(device, "N/A")) # Escape pipe characters in mode name escaped_name = mode_name.replace('|', '\\|') markdown += f"| {mode_num} | {escaped_name} | " markdown += " | ".join(speed_row) + " |\n" print(markdown) if __name__ == "__main__": main()