"""
Utilities for CFG/MemCFG objects and their datasets
"""
import numpy as np
import bincfg
from .type_utils import *
from .misc_utils import get_smallest_np_dtype
[docs]
def get_address(obj: 'AddressLike') -> 'int':
"""Gets the integer address from the given object
Args:
obj (Union[str, int, Addressable]): a string, int, or object with a string/int `.address` attribute (should
always be positive)
Raises:
TypeError: `obj` is an unknown type
ValueError: given address is negative
Returns:
int: the integer address
"""
if isinstance(obj, str):
ret = int(obj, 0)
elif isinstance(obj, (int, np.integer)):
ret = obj
elif hasattr(obj, 'address'):
ret = get_address(obj.address)
else:
raise TypeError("Cannot get address value from object of type: '%s'" % type(obj).__name__)
if ret < 0:
raise ValueError("Cannot have an address that is negative: %d" % obj)
return ret
[docs]
def get_special_function_names():
"""Returns the current global special function names"""
return SPECIAL_FUNCTION_NAMES
[docs]
def check_for_normalizer(dataset, cfg_data):
"""Checks the incoming data for a normalizer to set to be `dataset`'s normalizer
Assumes this dataset does not yet have a normalizer. Searches the incoming `cfg_data` for a cfg/dataset that
has a normalizer, and sets it to be this dataset's normalizer. If this method finds no normalizer, or multiple
unique normalizers, then an error will be raised.
Args:
dataset (Union[CFGDataset, MemCFGDataset]): a ``CFGDataset`` or ``MemCFGDataset`` without a normalizer
cfg_data (Iterable[Union[str, CFG, MemCFG, CFGDataset, MemCFGDataset]]): an iterable of
str/``CFG``/``MemCFG``/``CFGDataset``/``MemCFGDataset``'s
Raises:
ValueError: when there are multiple conflicting normalizers, or if no normalizer could be found
"""
# If this is an individual object whose normalizer is the same for itself or all sub-objects, then we only need to check itself
if isinstance(cfg_data, (bincfg.MemCFG, bincfg.CFG, bincfg.MemCFGDataset)) or \
(isinstance(cfg_data, bincfg.CFGDataset) and not cfg_data.allow_multiple_norms):
cfg_data = [cfg_data]
for cfgd in cfg_data:
if isinstance(cfgd, str):
continue
if cfgd.normalizer is not None:
if dataset.normalizer is None:
dataset.normalizer = cfgd.normalizer
elif dataset.normalizer != cfgd.normalizer:
raise ValueError("Multiple normalizers detected.")
if dataset.normalizer is None:
raise ValueError("Could not find a normalizer in cfg_data.")
[docs]
def update_atomic_tokens(file_tokens, curr_data, update_tokens):
"""Updates atomic tokens. Only meant to be passed to AtomicData.atomic_update as the function to use"""
curr_data.update(file_tokens)
for t in update_tokens:
curr_data.setdefault(t, len(curr_data))
return curr_data
[docs]
def update_memcfg_tokens(cfg_data, tokens):
"""Adds all new tokens to `tokens`, and updates all tokens in `cfg_data` to their respective values in `tokens`
Tokens in `cfg_data` will be modified, as will the `.asm_lines` attribute of each memcfg. Assumes the `cfg_data`
has conflicting tokens to `tokens` and thus needs modification. Both `cfg_data` and `tokens` will be
modified in-place.
Args:
cfg_data (Union[MemCFG, MemCFGDataset]): the memcfg/memcfgdataset to have its tokens changed
tokens (Union[Dict[str, int], AtomicData]): the dictionary of tokens to update with the new tokens in `cfg_data`.
Can be an AtomicData object for atomic updating of tokens
"""
# If the tokens are the same, we can just return
if tokens is cfg_data.tokens:
return
# Add all the new tokens in cfg_data to tokens
if isinstance(tokens, bincfg.AtomicTokenDict):
tokens.update(cfg_data.tokens)
else:
for new_token in cfg_data.tokens:
tokens.setdefault(new_token, len(tokens))
# Create the mapping from old token value to new token value
old_to_new = {v: tokens[t] for t, v in cfg_data.tokens.items()}
# Update all of the `.asm_lines` to their new token values
update_cfgs = [cfg_data] if isinstance(cfg_data, bincfg.MemCFG) else cfg_data.cfgs
for cfg in update_cfgs:
new_asm_lines = [old_to_new[l] for l in cfg.asm_lines]
cfg.asm_lines = np.array(new_asm_lines, dtype=get_smallest_np_dtype(max(new_asm_lines)))
# The default global set of special function names
SPECIAL_FUNCTION_NAMES = {'free', 'printf', 'fputc', 'fprintf', '__gmon_start__', 'memcpy', 'exit', 'fwrite', 'abort',
'strcpy', 'memmove', '__errno_location', 'ferror', 'fread', 'fclose', 'sprintf', '__libc_start_main', 'ftell', 'fseek',
'fopen', 'libintl_gettext', 'getenv', 'libintl_textdomain', 'strrchr', 'fputs', 'libintl_bindtextdomain', 'strerror',
'_IO_putc', 'setlocale', 'vfprintf', 'xmalloc', '__xstat', 'xexit', 'unlink', 'getopt_long', 'close', 'fflush', 'strcmp',
'malloc', 'bfd_scan_vma', 'xrealloc', 'bfd_get_error', 'bfd_errmsg', 'strlen', 'bfd_set_format', 'strtol', 'access',
'xstrdup', 'mkstemps', 'bfd_malloc', 'bfd_target_list', 'xmalloc_set_program_name', 'bfd_openw', '__assert_fail',
'bfd_close_all_done', 'bfd_set_default_target', 'mkstemp', 'memset', 'bfd_arch_list', 'bfd_openr', 'ctime', 'strchr',
'bfd_printable_arch_mach', 'realloc', 'mkdtemp', 'bfd_close', 'bfd_init', 'bfd_check_format_matches', 'bfd_check_format',
'strncmp', '_IO_getc', 'puts', 'strncpy', 'putchar', 'bfd_set_error_program_name', 'bfd_get_section_by_name',
'bfd_map_over_sections', 'strtoul', 'qsort', 'bfd_get_section_contents', '__strdup', 'filename_cmp', 'open', 'write',
'strstr', 'bfd_openr_next_archived_file', 'bfd_set_section_contents', 'bfd_get_arch', 'bfd_set_section_size', 'strcat',
'sbrk', 'memcmp', 'bfd_get_mach', 'bfd_canonicalize_reloc', 'bfd_get_reloc_upper_bound', 'read', 'concat', 'xcalloc',
'feof', 'stpcpy', 'bfd_set_error', 'lbasename', 'bfd_set_symtab', 'bfd_make_section_with_flags', 'strcasecmp',
'unlink_if_ordinary', 'bfd_bread', 'bfd_seek', 'snprintf', 'calloc', 'cplus_demangle_name_to_style',
'cplus_demangle_set_style', 'fileno', 'perror', 'bfd_reloc_type_lookup', 'xstrerror', '_exit', 'fcntl', 'fdopen',
'__lxstat', 'chmod', 'bsearch', 'fnmatch', 'time', 'bfd_demangle', 'bfd_set_section_flags', 'cplus_demangle',
'remove', 'dup2', 'pipe', 'kill', 'wait4', 'waitpid', 'vfork', 'execvp', 'sleep', 'execv', 'bfd_hash_traverse',
'bfd_hash_newfunc', 'bfd_hash_table_free', 'bfd_hash_allocate', 'bfd_hash_lookup', 'bfd_scan_arch', 'bfd_hash_table_init',
'bfd_set_start_address', 'atoi', 'rewind', 'htab_find_slot', 'htab_find', 'abort', 'abs', 'acos', 'asctime', 'asctime_r',
'asin', 'assert', 'atan', 'atan2', 'atexit', 'atof', 'atoi', 'atol', 'bsearch', 'btowc', 'calloc', 'catclose', 'catgets',
'catopen', 'ceil', 'clearerr', 'clock', 'cos', 'cosh', 'ctime', 'ctime64', 'ctime_r', 'ctime64_r', 'difftime',
'difftime64', 'div', 'erf', 'erfc', 'exit', 'exp', 'fabs', 'fclose', 'fdopen', 'feof', 'ferror', 'fflush', 'fgetc',
'fgetpos', 'fgets', 'fgetwc', 'fgetws', 'fileno', 'floor', 'fmod', 'fopen', 'fprintf', 'fputc', 'fputs', 'fputwc',
'fputws', 'fread', 'free', 'freopen', 'frexp', 'fscanf', 'fseek', 'fsetpos', 'ftell', 'fwide', 'fwprintf', 'fwrite',
'fwscanf', 'gamma', 'getc', 'getchar', 'getenv', 'gets', 'getwc', 'getwchar', 'gmtime', 'gmtime64', 'gmtime_r',
'gmtime64_r', 'hypot', 'isalnum', 'isalpha', 'isascii', 'isblank', 'iscntrl', 'isdigit', 'isgraph', 'islower',
'isprint', 'ispunct', 'isspace', 'isupper', 'iswalnum', 'iswalpha', 'iswblank', 'iswcntrl', 'iswctype', 'iswdigit',
'iswgraph', 'iswlower', 'iswprint', 'iswpunct', 'iswspace', 'iswupper', 'iswxdigit', 'isxdigit', 'j0', 'j1', 'jn',
'labs', 'ldexp', 'ldiv', 'localeconv', 'localtime', 'localtime64', 'localtime_r', 'localtime64_r', 'log', 'log10',
'longjmp', 'malloc', 'mblen', 'mbrlen', 'mbrtowc', 'mbsinit', 'mbsrtowcs', 'mbstowcs', 'mbtowc', 'memchr', 'memcmp',
'memcpy', 'memmove', 'memset', 'mktime', 'mktime64', 'modf', 'nextafter', 'nextafterl', 'nexttoward', 'nexttowardl',
'nl_langinfo4', 'perror', 'pow', 'printf', 'putc', 'putchar', 'putenv', 'puts', 'putwc', 'putwchar', 'qsort',
'quantexpd32', 'quantexpd64', 'quantexpd128', 'quantized32', 'quantized64', 'quantized128', 'samequantumd32',
'samequantumd64', 'samequantumd128', 'raise', 'rand', 'rand_r', 'realloc', 'regcomp', 'regerror', 'regexec',
'regfree', 'remove', 'rename', 'rewind', 'scanf', 'setbuf', 'setjmp', 'setlocale', 'setvbuf', 'signal', 'sin',
'sinh', 'snprintf', 'sprintf', 'sqrt', 'srand', 'sscanf', 'strcasecmp', 'strcat', 'strchr', 'strcmp', 'strcoll',
'strcpy', 'strcspn', 'strerror', 'strfmon', 'strftime', 'strlen', 'strncasecmp', 'strncat', 'strncmp', 'strncpy',
'strpbrk', 'strptime', 'strrchr', 'strspn', 'strstr', 'strtod', 'strtod32', 'strtod64', 'strtod128', 'strtof',
'strtok', 'strtok_r', 'strtol', 'strtold', 'strtoul', 'strxfrm', 'swprintf', 'swscanf', 'system', 'tan', 'tanh',
'time', 'time64', 'tmpfile', 'tmpnam', 'toascii', 'tolower', 'toupper', 'towctrans', 'towlower', 'towupper',
'ungetc', 'ungetwc', 'va_arg', 'va_copy', 'va_end', 'va_start', 'vfprintf', 'vfscanf', 'vfwprintf', 'vfwscanf',
'vprintf', 'vscanf', 'vsprintf', 'vsnprintf', 'vsscanf', 'vswprintf', 'vswscanf', 'vwprintf', 'vwscanf', 'wcrtomb',
'wcscat', 'wcschr', 'wcscmp', 'wcscoll', 'wcscpy', 'wcscspn', 'wcsftime', 'wcslen', 'wcslocaleconv', 'wcsncat',
'wcsncmp', 'wcsncpy', 'wcspbrk', 'wcsptime', 'wcsrchr', 'wcsrtombs', 'wcsspn', 'wcsstr', 'wcstod', 'wcstod32',
'wcstod64', 'wcstod128', 'wcstof', 'wcstok', 'wcstol', 'wcstold', 'wcstombs', 'wcstoul', 'wcsxfrm', 'wctob', 'wctomb',
'wctrans', 'wctype', 'wcwidth', 'wmemchr', 'wmemcmp', 'wmemcpy', 'wmemmove', 'wmemset', 'wprintf', 'wscanf', 'y0', 'y1',
'yn'}