import functools import string import time import re import random TIMINGS = {} def timing(f): """Decorator to time functions""" @functools.wraps(f) def wrap(*args, **kw): start = time.perf_counter() try: return f(*args, **kw) finally: end = time.perf_counter() name = f.__name__ delta = end - start delta_milliseconds = round(delta * 1000) values = TIMINGS.get(name, []) values.append(delta_milliseconds) TIMINGS[name] = values return wrap def print_timings(): import statistics for k, v in TIMINGS.items(): print(f"{k} took {round(statistics.mean(v), 3)}±{round(statistics.stdev(v), 3)} ms") def random_word(length: int) -> str: letters = string.ascii_lowercase + string.ascii_uppercase return "".join(random.choice(letters) for i in range(length)) @timing def generate(length: int) -> list[str]: output = [] for _ in range(0, length): output.append(random_word(80)) return output @timing def with_regex(data: list[str]): regexp = r"^[a-z0-9A-Z][a-z0-9A-Z\-_. ():/,@\[\]]*$" re_name = re.compile(regexp) for item in data: if re.fullmatch(re_name, item): continue @timing def with_sets(data: list[str]): first_char = set(string.ascii_lowercase + string.ascii_uppercase + string.digits) second_char = set(string.ascii_lowercase + string.ascii_uppercase + string.digits + "\\-_. ():/,@[]") for item in data: if item[0] not in first_char: continue for i in item[1:]: if i not in second_char: break @timing def with_ord(data: list[str]): for item in data: first_char = ord(item[0]) if not ( (48 <= first_char <= 57) or (65 <= first_char <= 90) or (97 <= first_char <= 122) ): continue for c in item[1:]: i = ord(c) if not ( (44 <= i <= 58) or (64 <= i <= 93) or (97 <= i <= 122) or (i == 32) or (i == 40) or (i == 41) ): break @timing def with_bytes(data: list[str]): first_char = set( bytes(string.ascii_lowercase, "ascii") + bytes(string.ascii_uppercase, "ascii") + bytes(string.digits, "ascii") ) first_char = bytes(first_char) second_char = set( bytes(string.ascii_lowercase, "ascii") + bytes(string.ascii_uppercase, "ascii") + bytes(string.digits, "ascii") + "\\-_. ():/,@[]".encode("ascii", "ignore") ) second_char = bytes(second_char) for item in data: if item[0].encode("ascii", "ignore").translate(None, first_char) != b"": continue if item.encode("ascii", "ignore").translate(None, second_char) != b"": continue if __name__ == "__main__": for _ in range(0, 10): data = generate(10_000_000) with_regex(data) with_sets(data) with_ord(data) with_bytes(data) print_timings()