Source code for synapse.lib.chop

import binascii

import regex

import synapse.exc as s_exc
import synapse.common as s_common

import synapse.lib.cache as s_cache

import synapse.lookup.cvss as s_cvss

TagMatchRe = regex.compile(r'([\w*]+\.)*[\w*]+')

'''
Shared primitive routines for chopping up strings and values.
'''
[docs] def intstr(text): return int(text, 0)
[docs] def digits(text): return ''.join([c for c in text if c.isdigit()])
[docs] def printables(text): return ''.join([c for c in text if c.isprintable()])
[docs] def hexstr(text): ''' Ensure a string is valid hex. Args: text (str): String to normalize. Examples: Norm a few strings: hexstr('0xff00') hexstr('ff00') Notes: Will accept strings prefixed by '0x' or '0X' and remove them. Returns: str: Normalized hex string. ''' text = text.strip().lower() if text.startswith(('0x', '0X')): text = text[2:] text = text.replace(' ', '').replace(':', '') if not text: raise s_exc.BadTypeValu(valu=text, name='hexstr', mesg='No string left after stripping') try: # checks for valid hex width and does character # checking in C without using regex s_common.uhex(text) except (binascii.Error, ValueError) as e: raise s_exc.BadTypeValu(valu=text, name='hexstr', mesg=str(e)) from None return text
[docs] def onespace(text): return ' '.join(text.split())
[docs] @s_cache.memoize(size=10000) def tag(text): return '.'.join(tagpath(text))
[docs] @s_cache.memoize(size=10000) def tagpath(text): text = text.lower().strip('#').strip() return [onespace(t) for t in text.split('.')]
[docs] @s_cache.memoize(size=10000) def tags(norm): ''' Divide a normalized tag string into hierarchical layers. ''' # this is ugly for speed.... parts = norm.split('.') return ['.'.join(parts[:i]) for i in range(1, len(parts) + 1)]
[docs] @s_cache.memoize(size=10000) def stormstring(s): ''' Make a string storm safe by escaping backslashes and double quotes. Args: s (str): String to make storm safe. Notes: This does not encapsulate a string in double quotes. Returns: str: A string which can be embedded directly into a storm query. ''' s = s.replace('\\', '\\\\') s = s.replace('"', '\\"') return s
[docs] def validateTagMatch(tag): ''' Raises an exception if tag is not a valid tagmatch (i.e. a tag that might have globs) ''' if TagMatchRe.fullmatch(tag) is None: raise s_exc.BadTag(mesg='Invalid tag match')
unicode_dashes = ( '\u2011', # non-breaking hyphen '\u2012', # figure dash '\u2013', # endash '\u2014', # emdash ) unicode_dashes_replace = tuple([(char, '-') for char in unicode_dashes])
[docs] def replaceUnicodeDashes(valu): ''' Replace unicode dashes in a string with regular dashes. Args: valu (str): A string. Returns: str: A new string with replaced dashes. ''' for dash in unicode_dashes: valu = valu.replace(dash, '-') return valu
[docs] def cvss2_normalize(vect): ''' Helper function to normalize CVSS2 vectors ''' vdict = cvss_validate(vect, s_cvss.cvss2) return cvss_normalize(vdict, s_cvss.cvss2)
[docs] def cvss3x_normalize(vect): ''' Helper function to normalize CVSS3.X vectors ''' vdict = cvss_validate(vect, s_cvss.cvss3_1) return cvss_normalize(vdict, s_cvss.cvss3_1)
[docs] def cvss_normalize(vdict, vers): ''' Normalize CVSS vectors ''' metrics = s_cvss.metrics[vers] undefined = s_cvss.undefined[vers] vals = [] for key in metrics: valu = vdict.get(key, undefined) if valu != undefined: vals.append(f'{key}:{valu}') return '/'.join(vals)
[docs] def cvss_validate(vect, vers): ''' Validate (as best as possible) the CVSS vector string. Look for issues such as: - No duplicated metrics - Invalid metrics - Invalid metric values - Missing mandatory metrics Returns a dictionary with the parsed metric:value pairs. ''' missing = [] badvals = [] invalid = [] tag = s_cvss.tags[vers] METRICS = s_cvss.metrics[vers] # Do some canonicalization of the vector for easier parsing _vect = vect _vect = _vect.strip('(') _vect = _vect.strip(')') if _vect.startswith(tag): _vect = _vect[len(tag):] if vers == s_cvss.cvss3_0 and _vect.startswith(_tag := s_cvss.tags[s_cvss.cvss3_1]): _vect = _vect[len(_tag):] if vers == s_cvss.cvss3_1 and _vect.startswith(_tag := s_cvss.tags[s_cvss.cvss3_0]): _vect = _vect[len(_tag):] try: # Parse out metrics mets_vals = [k.split(':') for k in _vect.split('/')] # Convert metrics into a dictionary vdict = dict(mets_vals) except ValueError: raise s_exc.BadDataValu(mesg=f'Provided vector {vect} malformed') # Check that each metric is only specified once if len(mets_vals) != len(set(k[0] for k in mets_vals)): seen = [] repeated = [] for met, val in mets_vals: if met in seen: repeated.append(met) seen.append(met) repeated = ', '.join(repeated) raise s_exc.BadDataValu(mesg=f'Provided vectors {vect} contains duplicate metrics: {repeated}') invalid = [] for metric in vdict: # Check that provided metrics are valid if metric not in METRICS: invalid.append(metric) if invalid: invalid = ', '.join(invalid) raise s_exc.BadDataValu(mesg=f'Provided vector {vect} contains invalid metrics: {invalid}') missing = [] badvals = [] for metric, (valids, mandatory, _) in METRICS.items(): # Check for mandatory metrics if mandatory and metric not in vdict: missing.append(metric) # Check if metric value is valid val = vdict.get(metric, None) if metric in vdict and val not in valids: badvals.append(f'{metric}:{val}') if missing: missing = ', '.join(missing) raise s_exc.BadDataValu(mesg=f'Provided vector {vect} missing mandatory metric(s): {missing}') if badvals: badvals = ', '.join(badvals) raise s_exc.BadDataValu(mesg=f'Provided vector {vect} contains invalid metric value(s): {badvals}') return vdict
[docs] def uncnorm(valu): ''' Validate and normalize the UNC path passed in `valu` into a URI. This function will accept `@SSL` and `@<port>` as part of the host name to indicate SSL (https) or a specific port number. It can also accept IPv6 addresses in the host name even though those are non-standard according to the spec. ''' proto = 'smb' port = 0 paths = () filename = '' if not valu.startswith('\\\\'): raise s_exc.BadTypeValu(mesg=f'Invalid UNC path: Does not start with \\\\.') parts = valu.split('\\') # e.g.: \\\\server\\share\\path\\file -> ['', '', 'server', 'share', 'path', 'file'] # host name and share name are mandatory if len(parts) < 4: raise s_exc.BadTypeValu(mesg=f'Invalid UNC path: Host name and share name are required.') host = parts[2] share = parts[3] # Share name length should be 1-80 characters sharelen = len(share) if sharelen == 0 or sharelen > 80: raise s_exc.BadTypeValu(mesg=f'Invalid UNC path: Share name must be 1-80 characters.') if len(parts) > 4: parts = parts[4:] # Check directory names paths = parts[:-1] for path in paths: if len(path) > 255: raise s_exc.BadTypeValu( mesg=f'Invalid UNC path: Path component longer than 255 characters.', valu=path ) # Check filename filename = parts[-1] if len(filename) > 255: fparts = filename.split(':') if len(fparts[0]) > 255: raise s_exc.BadTypeValu( mesg=f'Invalid UNC path: Filename longer than 255 characters.', valu=fparts[0] ) # Done with validation, now get to the choppa if '@SSL' in host: proto = 'https' host = host.replace('@SSL', '') if '@' in host: # Could be '@<port>' host, port = host.split('@', 1) try: port = int(port) except ValueError: raise s_exc.BadTypeValu( mesg=f'Invalid UNC path: Invalid port.', valu=port ) # Convert ...ipv6-literal.net back to an actual ipv6 address if host.lower().endswith('.ipv6-literal.net'): host = host.lower().strip('.ipv6-literal.net') host = host.replace('-', ':') # Strip off the zone index if 's' in host: host = host[:host.index('s')] if host.count(':') >= 2: if port: # Host is an ipv6 address host = f'[{host}]' else: host = host.strip('[]') if port: port = f':{port}' else: port = '' if paths: paths = '/'.join(paths) paths = f'/{paths}' else: paths = '' if filename: filename = f'/{filename}' return f'{proto}://{host}{port}/{share}{paths}{filename}'