#! /usr/bin/env python3.4 from math import log from itertools import product, chain, permutations import functools import subprocess, re, sys import os class Op: std = lambda op, operand_strs, width : '({op} {args})'.format(op=str(op), args=' '.join(operand_strs), width=width) ops = dict() @classmethod def add(cls, op_list): for o in op_list: cls.ops[(o.result_type, o.smtname)] = o o.inject() @classmethod def get(cls, recv_class, op): try: return cls.ops[(recv_class, op)] except KeyError: return None def __init__(self, smtname, pyname, eval_str, receiver_type, result_type, fmt=std): self.smtname = smtname self.pyname = pyname self.eval_str = eval_str self.receiver_type = receiver_type self.result_type = result_type self.fmt = fmt self.arity = 0 # get the arity of the operator from the semantic description for m in re.finditer('{([0-9]+)}', self.eval_str): self.arity = max(self.arity, int(m.group(1))) def inject(self): method = lambda *args, name=self.smtname, rt=self.result_type : rt(name, args) setattr(self.receiver_type, self.pyname, method) def make_expr_str(self, operand_strs, width): res = '(' + self.eval_str.format(*operand_strs, width=width) + ')' return res def format(self, operand_strs, width): return self.fmt(self, operand_strs, width) def __str__(self): return self.smtname class Formula: def parse_const(string): if string[0] == '#': return BitvecConst.from_string(string) else: return IntConst(int(string)) def join(op, clauses): l = [ c for c in clauses ] assert len(l) > 0 return l[0] if len(l) == 1 else Formula(op, l) def join_and(clauses): return Formula.join('and', clauses) def join_or(clauses): return Formula.join('or', clauses) def __init__(self, op, operands=[]): assert type(op) == str self.operands = operands self.op = op self.operator = Op.get(self.__class__, op) self.width = max(o.width for o in operands) if len(operands) > 0 else 0 def __ne__(self, other): return (self == other).not_() def __and__(self, other): return Formula.join('and', [self, other]) def __not__(self): return self.not_() def var_subst(self, s): cls = globals()[self.__class__.__name__] return cls(self.op, [ o.var_subst(s) for o in self.operands ]) def varname_subst(self, env): cls = globals()[self.__class__.__name__] return cls(self.op, [ o.varname_subst(env) for o in self.operands ]) def variables(self, visited): for c in self.operands: for v in c.variables(visited): yield v def __str__(self): if self.operator is None: assert len(self.operands) == 0, str(type(self.op)) return self.op else: return self.operator.format([ str(o) for o in self.operands ], self.width) if len(self.operands) > 0 else str(self.op) # return '({0} {1})'.format(self.op, ' '.join(map(str, self.operands))) if len(self.operands) > 0 else str(self.op) def python_expr(self, var_pos_mapping): assert not self.operator is None, 'operator ' + self.op + ' happened in class ' + self.__class__.__name__ return self.operator.make_expr_str([ o.python_expr(var_pos_mapping) for o in self.operands ], self.width) def solve(self): cmds = '\n'.join('(declare-fun {name} () {ty})'.format(name=v.name, ty=v.ty) for v in self.variables(set())) cmds += """ (set-option :produce-models true) (assert {formula}) (check-sat) (get-value ({variables})) """.format(formula=str(self), variables=' '.join(map(str, self.variables(set())))) p = subprocess.Popen([ "z3", "-in", "-smt2"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, universal_newlines=True) #print(cmds) (out, err) = p.communicate(input=cmds) p.wait() if out.startswith('unsat'): return None if p.returncode != 0: print('z3 error') print(out) sys.exit(1) mapping = dict() pat = '\(([a-zA-Z_][0-9a-zA-Z_]*) (#x[0-9a-fA-f]+|\d+|#b[0-1]+)\)' for m in re.finditer(pat, out): var = m.group(1) val = m.group(2) mapping[var] = Formula.parse_const(val) return mapping if True: def make_op(smtname, pyname, eval_str): return Op(smtname=smtname, pyname=pyname, eval_str=eval_str, receiver_type=Formula, result_type=Formula) Op.add([ make_op(smtname='and', pyname='__and__', eval_str='{0} and {1}'), make_op(smtname='xor', pyname='__xor__', eval_str='{0} xor {1}'), make_op(smtname='or', pyname='__or__', eval_str='{0} or {1}'), make_op(smtname='=', pyname='__eq__', eval_str='{0} == {1}'), make_op(smtname='not', pyname='not_', eval_str='not {0}'), make_op(smtname='not', pyname='negate', eval_str='not {0}'), make_op(smtname='ite', pyname='ite', eval_str='{1} if {0} else {2}'), make_op(smtname='=>', pyname='implies', eval_str='(not {0}) or {1}'), ]) TRUE = Formula("true",[]) FALSE = Formula("false",[]) class NewlineFormula(Formula): def __init__(self, operand): super().__init__('', [ operand ]) def __str__(self): return str(self.operands[0]) + '\n' def var_subst(self, s): return self def varname_subst(self, env): return self class BitvecFormula(Formula): def bvashr(x, y, width): assert y >= 0 y = y & (width - 1) s = x >> y highest_bit = x >> (width - 1) mask = ~((highest_bit << (width - y)) - 1) & (1 << width) - 1 return s | mask def bvextract(bv, i, j, width): assert j <= i assert i <= width return (bv >> j) & ((1 << (i - j)) - 1) def bvslt(a, b, width): # found by myself return bool((a < b) - ((b >> (width - 1)) - (a >> (width - 1)))) def bvsle(a, b, width): # found by myself return bvslt(b, a, width) <= 0 def __init__(self, op, operands): super().__init__(op, operands) def masked(self, operation): return '({o} & 0x{mask:x})'.format(o=operation, mask=((1 << self.width) - 1)) def python_expr(self, operand_strs): return self.masked(super().python_expr(operand_strs)) if True: def make_op(smtname, pyname, eval_str, result_type=BitvecFormula, fmt=Op.std): return Op(smtname=smtname, pyname=pyname, eval_str=eval_str, result_type=result_type, receiver_type=BitvecFormula, fmt=fmt) Op.add([ make_op(smtname='bvadd', pyname='__add__', eval_str='{0} + {1}'), make_op(smtname='bvmul', pyname='__mul__', eval_str='{0} * {1}'), make_op(smtname='bvsub', pyname='__sub__', eval_str='{0} - {1}'), make_op(smtname='bvand', pyname='__and__', eval_str='{0} & {1}'), make_op(smtname='bvxor', pyname='__xor__', eval_str='{0} ^ {1}'), make_op(smtname='bvor', pyname='__or__', eval_str='{0} | {1}'), make_op(smtname='bvnot', pyname='__invert__', eval_str='~{0}'), make_op(smtname='bvneg', pyname='__neg__', eval_str='~{0} + 1'), make_op(smtname='bvslt', pyname='slt', eval_str='BitvecFormula.bvslt({0}, {1}, {width:d})', result_type=Formula), make_op(smtname='bvsle', pyname='sle', eval_str='BitvecFormula.bvsle({0}, {1}, {width:d})', result_type=Formula), make_op(smtname='bvsgt', pyname='sgt', eval_str='BitvecFormula.bvslt({1}, {0}, {width:d})', result_type=Formula), make_op(smtname='bvsge', pyname='sge', eval_str='BitvecFormula.bvsle({1}, {0}, {width:d})', result_type=Formula), make_op(smtname='bvult', pyname='__lt__', eval_str='{0} < {1}', result_type=Formula), make_op(smtname='bvule', pyname='__le__', eval_str='{0} <= {1}', result_type=Formula), make_op(smtname='bvugt', pyname='__gt__', eval_str='{0} > {1}', result_type=Formula), make_op(smtname='bvuge', pyname='__ge__', eval_str='{0} >= {1}', result_type=Formula), make_op(smtname='bvshl', pyname='__lshift__', eval_str='{0} << {1}'), make_op(smtname='bvlshr', pyname='__rshift__', eval_str='{0} >> {1}'), make_op(smtname='bvashr', pyname='ashr', eval_str='BitvecFormula.bvashr({0}, {1}, {width:d})'), make_op(smtname='extract', pyname='extract', eval_str='BitvecFormula.bvextract({0}, {1}, {2}, {width:d})', fmt=lambda op, operand_strs, width : '((_ extract {1} {2}) {0})'.format(*operand_strs, width=width)), make_op(smtname='at', pyname='at', eval_str='BitvecFormula.bvextract({0}, {1}, {1}, {width:d})', fmt=lambda op, operand_strs, width : '((_ zero_extend {w}) ((_ extract {1} {1}) {0}))'.format(*operand_strs, w=width - 1)), ]) class BitvecConst(BitvecFormula): def from_string(s): bases = { 'b': 2, 'x': 16 } base = s[1] string = s[2:] assert base in bases value = int(string, bases[base]) width = len(string) * int(log(bases[base], 2)) return BitvecConst(value, width) def __init__(self, value, width): if width % 4 == 0: op = '#x{val:0{nibbles}x}'.format(val=value, nibbles=(width // 4)) else: op = '#b{val:0{width}b}'.format(val=value, width=width) super().__init__(op, []) self.width = width self.value = value def var_subst(self, s): return self def varname_subst(self, env): return self def python_expr(self, var_pos_mapping): return self.masked('0x{:x}'.format(self.value)) def binary_rep(self): return '#b{val:0{width}b}'.format(val=self.value, width=self.width) #"0b"+"".join([]) def __str__(self): return self.binary_rep() class BitvecVar(BitvecFormula): def __init__(self, name, width): super().__init__(name, []) self.name = name self.width = width self.ty = '(_ BitVec {width})'.format(**locals()) def clone(self, name): return BitvecVar(name, self.width) def variables(self, visited): if not self.name in visited: visited.add(self.name) yield self def var_subst(self, env): return env[self.name] if self.name in env else self def varname_subst(self, env): return BitvecVar(env[self.name], self.width) if self.name in env else self def python_expr(self, var_pos_mapping): return self.masked('i[{pos}]'.format(pos=var_pos_mapping[self.name])) class IntFormula(Formula): def __init__(self, op, operands): super().__init__(op, operands) if True: def make_op(smtname, pyname, eval_str, result_type=IntFormula, fmt=Op.std): return Op(smtname=smtname, pyname=pyname, eval_str=eval_str, result_type=result_type, receiver_type=IntFormula, fmt=fmt) # linear arithmetic operators Op.add([ make_op(smtname='<=', pyname='__le__', eval_str='{0} <= {1}', result_type=Formula), make_op(smtname='<', pyname='__lt__', eval_str='{0} < {1}', result_type=Formula), make_op(smtname='add', pyname='__add__', eval_str='{0} + {1}'), make_op(smtname='sub', pyname='__sub__', eval_str='{0} - {1}'), make_op(smtname='mul', pyname='__mul__', eval_str='{0} * {1}'), ]) class IntConst(IntFormula): def __init__(self, value): super().__init__(str(value), []) self.value = value def var_subst(self, s): return self def python_expr(self, operand_strs): return self.value class IntVar(IntFormula): def __init__(self, name): super().__init__(name, []) self.name = name self.ty = 'Int' def variables(self, visited): if not self.name in visited: visited.add(self.name) yield self def var_subst(self, env): return env[self.name] if self.name in env else self def varname_subst(self, env): return IntVar(env[self.name]) if self.name in env else self class PropVar(Formula): def __init__(self, name): super().__init__(name, []) self.name = name self.ty = 'Bool' def variables(self, visited): if not self.name in visited: visited.add(self.name) yield self def var_subst(self, s): return s[self.name] if self.name in s else self def varname_subst(self, env): return IntVar(s[self.name]) if self.name in env else self ######################################################################################################################################################## class AbstractDomain: def __init__(self, variable, bottom, top, join, conc_relation, representation): self.variable = variable self.join = join self.conc_relation = conc_relation self.representation = representation self.bottom = bottom self.top = top def value_assertion(self, name, value): return self.variable(name) == value def conc_variables(self, name): return [self.variable(name)] def extract_value(self, counterexample, name): return counterexample[name] class ReducedProduct(AbstractDomain): def __init__(self, domA, domB): self.domA = domA self.domB = domB self.conc_variables = domA.conc_variables self.extract_value = domA.extract_value self.bottom = [domA.bottom, domB.bottom] self.top = [domA.top, domB.top] def value_assertion(self, name, value): return (self.domA.value_assertion(name+"A", value[0]) & self.domB.value_assertion(name+"B", value[1])) def join(self, valOne, valTwo): return [self.domA.join(valOne[0], valTwo[0]), self.domB.join(valOne[1], valTwo[1])] def conc_relation(self, c, a): return (self.domA.conc_relation(c, a+"A") & self.domB.conc_relation(c, a+"B")) def representation(self, c): return [self.domA.representation(c), self.domB.representation(c)] def myprint(string): if verbose: print(string) def overapproximates(set_to_cover, abs_value, abs_domain): formula = Formula.join_and(NewlineFormula(c) for c in [abs_domain.value_assertion('a', abs_value), set_to_cover, abs_domain.conc_relation('c','a').negate()]) counterexample = formula.solve() if not counterexample: return (True, None) else: return (False, abs_domain.extract_value(counterexample, 'c')) def best_abstract_representation_of_set(set_to_cover, initial_abs, abs_domain): #best abstract representation of set_to_cover greater than or equal to initial_abs abs_value = initial_abs while True: myprint("current abs. candidate: {}".format(abs_value)) (done, counterexample) = overapproximates(set_to_cover, abs_value, abs_domain) if done: return abs_value myprint("not yet covered: {}".format(counterexample)) abs_value = abs_domain.join(abs_value, abs_domain.representation(counterexample)) def best_abstract_output(spec, abs_input_values, abs_domain): abs_inputs_formula = Formula.join_and([NewlineFormula(abs_domain.value_assertion("a{}_pre".format(index), abs_input)) for index, abs_input in enumerate(abs_input_values)]) conc_inputs_formula = Formula.join_and([NewlineFormula(abs_domain.conc_relation("c{}_pre".format(index), "a{}_pre".format(index))) for index in range(0, len(abs_input_values))]) cpost_subst = dict(zip([var.name for var in abs_domain.conc_variables("c_post")], [var.name for var in abs_domain.conc_variables("c")])) set_to_cover = Formula.join_and(NewlineFormula(c) for c in [abs_inputs_formula, conc_inputs_formula, spec.varname_subst(cpost_subst)]) res = best_abstract_representation_of_set(set_to_cover, abs_domain.bottom, abs_domain) print("best abstract output for abstract input(s) "+prettyprint(abs_input_values)+" and concrete transformation " + str(spec) + " is "+ prettyprint(res)) return res #enables backward analysis based on forward (or backward) specification of semantics def best_abstract_input(spec, abs_output, abs_domain): abs_inputs_formula = NewlineFormula(abs_domain.value_assertion("a_post", abs_output)) conc_inputs_formula = NewlineFormula(abs_domain.conc_relation("c_post", "a_post")) cpost_subst = dict(zip([var.name for var in abs_domain.conc_variables("c0_pre")], [var.name for var in abs_domain.conc_variables("c")])) set_to_cover = Formula.join_and(NewlineFormula(c) for c in [abs_inputs_formula, conc_inputs_formula, spec.varname_subst(cpost_subst)]) res = best_abstract_representation_of_set(set_to_cover, abs_domain.bottom, abs_domain) print("best abstract input for abstract output "+prettyprint(abs_output)+" and concrete transformation " + str(spec) + " is "+ prettyprint(res)) return res def prettyprint(o): if type(o) == BitvecConst: return o.binary_rep() elif type(o) == IntConst: return str(o) else: return "["+", ".join(prettyprint(e) for e in o)+"]" def testdomain(name, abs_domain): print("\n"+name) c = variable('c') my_set = (((c == BitvecConst.from_string("0b00000011")) | (c == BitvecConst.from_string("0b10001000"))) & (c != BitvecConst.from_string("0b10001001"))) best_rep = best_abstract_representation_of_set(my_set, abs_domain.bottom, abs_domain) #print("best representation of "+str(my_set)+" is "+prettyprint(best_rep)) c0_pre = variable('c0_pre') c1_pre = variable('c1_pre') c_post = variable('c_post') abs_input = BitvecConst.from_string("0b01000011") spec = (c_post == (c0_pre | BitvecConst.from_string("0b10010100"))) best_abstract_output(spec, [abs_input], abs_domain) spec = (c_post == (c0_pre >> BitvecConst.from_string("0b00000100") | c0_pre)) best_abstract_output(spec, [abs_input], abs_domain) abs_input = BitvecConst.from_string("0b01010101") best_abstract_output(spec, [abs_input], abs_domain) abs_input = BitvecConst.from_string("0b00100100") best_abstract_output(spec, [abs_input], abs_domain) best_abstract_input(spec, abs_input, abs_domain) abs_input1 = BitvecConst.from_string("0b00100100") abs_input2 = BitvecConst.from_string("0b00000011") spec_diff = (c_post == (c0_pre - c1_pre)) best_abstract_output(spec_diff, [abs_input1, abs_input2], abs_domain) spec_sum = (c_post == (c0_pre + c1_pre)) best_abstract_output(spec_sum, [abs_input1, abs_input2], abs_domain) def testdomainpairs(name, abs_domain): print("\n"+name) c = variable('c') my_set = (((c == BitvecConst.from_string("0b00000011")) | (c == BitvecConst.from_string("0b10001000"))) & (c != BitvecConst.from_string("0b10001001"))) #(correct, counterexample) = overapproximates(my_set, BitvecConst.from_string("0b11000011"), abs_domain) #print("true") if correct else print("false: "+counterexample.binary_rep() + " not covered") #(correct, counterexample) = overapproximates(my_set, BitvecConst.from_string("0b11101011"), abs_domain) #print("true") if correct else print("false: "+counterexample.binary_rep() + " not covered") best_rep = best_abstract_representation_of_set(my_set, abs_domain.bottom, abs_domain) print("best representation of "+str(my_set)+" is "+prettyprint(best_rep)) c0_pre = variable('c0_pre') c1_pre = variable('c1_pre') c_post = variable('c_post') abs_input = [BitvecConst.from_string("0b01000011"), BitvecConst.from_string("0b01000011")] spec = (c_post == (c0_pre | BitvecConst.from_string("0b10010100"))) best_abstract_output(spec, [abs_input], abs_domain) spec = (c_post == (c0_pre >> BitvecConst.from_string("0b00000010") | c0_pre)) best_abstract_output(spec, [abs_input], abs_domain) abs_input = [BitvecConst.from_string("0b01010101"), BitvecConst.from_string("0b01010101")] best_abstract_output(spec, [abs_input], abs_domain) abs_input = [BitvecConst.from_string("0b00100100"), BitvecConst.from_string("0b00100100")] best_abstract_output(spec, [abs_input], abs_domain) abs_input1 = [BitvecConst.from_string("0b00100100"), BitvecConst.from_string("0b00100100")] abs_input2 = [BitvecConst.from_string("0b00010011"), BitvecConst.from_string("0b00011011")] #contradictory for intervals spec_diff = (c_post == (c0_pre - c1_pre)) best_abstract_output(spec_diff, [abs_input1, abs_input2], abs_domain) spec_sum = (c_post == (c0_pre + c1_pre)) best_abstract_output(spec_sum, [abs_input1, abs_input2], abs_domain) if __name__ == "__main__": os.system('clear') verbose = False #Definite ones and zeroes domains: w = 8 variable = lambda name: BitvecVar(name, w) bottom = BitvecConst.from_string("0b00000000") top = BitvecConst.from_string("0b11111111") conc_relation = lambda c, a: ((variable(c) | variable(a)) == variable(a)) join = lambda x, y: BitvecConst(eval((x | y).python_expr([])), w) representation = lambda value: value abs_domain_definite_zero = AbstractDomain(variable, bottom, top, join, conc_relation, representation) testdomain("definite zeroes: ", abs_domain_definite_zero) conc_relation_definite_one = lambda c, a: ((variable(c) | variable(a)) == variable(c)) meet = lambda x, y: BitvecConst(eval((x & y).python_expr([])), w) abs_domain_definite_one = AbstractDomain(variable, top, bottom, meet, conc_relation_definite_one, representation) testdomain("definite ones: ", abs_domain_definite_one) abs_domain_combination = ReducedProduct(abs_domain_definite_zero, abs_domain_definite_one) testdomainpairs("reduced product of definite ones and definite zeroes: ", abs_domain_combination) #Upper and lower bounds domains: conc_relation_leq = lambda c, a: (variable(c) <= variable(a)) join_leq = lambda x, y: BitvecConst(eval(Formula.ite(x <= y, y, x).python_expr([])), w) abs_domain_leq = AbstractDomain(variable, bottom, top, join_leq, conc_relation_leq, representation) testdomain("upper bound domain: ", abs_domain_leq) conc_relation_geq = lambda c, a: (variable(a) <= variable(c)) join_geq = lambda x, y: BitvecConst(eval(Formula.ite(x <= y, x, y).python_expr([])), w) abs_domain_geq = AbstractDomain(variable, top, bottom, join_geq, conc_relation_geq, representation) testdomain("lower bound domain: ", abs_domain_geq) #... whose reduced product is an interval domain: abs_domain_intervals = ReducedProduct(abs_domain_geq, abs_domain_leq) testdomainpairs("reduced product of lower and upper bounds = interval domain: ", abs_domain_intervals)