#!/usr/bin/python # BayesCards.py demos Bayes' rule (conditional probability) with a card deck. # Initial code 2017, hyptothesis and evidence may be functions as of 10/2019. # Update 10/26/2022 to handle multiple evidence items in a tuple or list # for demonstrating failed statistical independence in cprob(). import random import copy import types SuitToColor = { "hearts" : "red", "diamonds" : "red", "clubs" : "black", "spades" : "black" } colors = set(SuitToColor.values()) values = [str(v) for v in range(2,11)] # 2 through 10 as string values = values + ["jack", "queen", "king", "ace"] hand = None # I am using a global in case I forget to assign deal's result. deck = [] for v in values: for s in SuitToColor.keys(): deck.append((v, s, SuitToColor[s])) deckCopy = copy.copy(deck) print("There are", len(deck), "cards in the deck.") def deal(count): global deckCopy global hand deckCopy = copy.copy(deck) # In case we called deal before result = [] if count > len(deckCopy): raise ValueError("ERROR, count " + str(count) + " is too high.") for i in range(0, count): index = random.randrange(0, len(deckCopy)) result.append(deckCopy[index]) del deckCopy[index] # I am using a global in case I forget to assign deal's result. hand = result return result def cprob(thehand, hypothesis, evidence): ''' hypothesis, evidence may be string or predicate-function. Updated 10/26/2022 for case where evidence is a list or tuple of evidence tests assumed to be statistically independent. ''' if isinstance(evidence,list) or isinstance(evidence,tuple): # Run them separately and then in the aggregate as an error check. result = 1.0 for e in evidence: result = result * cprob(thehand, hypothesis, e) print("cprob(thehand,",hypothesis,"given",e,"=",cprob(thehand, hypothesis, e)) print("\nPRODUCT OF SEPARATE PER-EVIDENCE RUNS", result) return result # https://brilliant.org/wiki/bayes-theorem/ # P(H | E) = (P(E | H) / P(E)) x P(H) # return the probability of the hypothesis, given the evidence PHC = 0.0 # P(H)'s count PEC = 0.0 # P(E)'s count PHEC = 0.0 # P(H | E)'s count PEHC = 0.0 # P(E | H)'s count for card in thehand: if ((type(evidence) == types.FunctionType and evidence(card)) or (evidence in card)): PEC += 1 if ((type(hypothesis) == types.FunctionType and hypothesis(card)) or (hypothesis in card)): PHEC += 1 if ((type(hypothesis) == types.FunctionType and hypothesis(card)) or (hypothesis in card)): PHC += 1 if ((type(evidence) == types.FunctionType and evidence(card)) or (evidence in card)): PEHC += 1 PE = PEC / len(thehand) PH = PHC / len(thehand) print('\nSTATE DUMP FOR HYPOTHESIS', hypothesis, 'GIVEN EVIDENCE', evidence) print("Out of", len(thehand), "cards, PE =", PE, ", PH = ", PH) result = 0.0 PEH = PEHC/PHC if PHC > 0 else None # PEH = (PHC > 0) ? PEHC/PHC : None PHE = PHEC/PEC if PEC > 0 else None # Above is the C/C++/Java syntax for # a conditional expression. if PHC == 0: print("P(H) of 0 gives multiplier of 0, tentative result of 0.\n") result = 0.0 if PEC == 0: print("Cannot compute P(H | E) with P(E) of 0.\n") result = None if PHC > 0 and PEC > 0: print("Measured P(E | H) =",PEH,",measured P(H | E) =", PHE, ", computed P(H | E) =", (PEH / PE) * PH, '\n') result = (PEH / PE) * PH return result from random import Random genhand = None def gendata(numbercolumns, numberrows, maxvalue, correlator): ''' Generate numberrows of numbercolumns string data with correlator being a function to generate correlations between the numbercolumns-1 non-target attributes and the 1 target attribute. For now numbers are random ints ranging from [0, maxvalue) for column 0, [0, maxvalue*2 by 2's) for column 1, etc. ''' global genhand rgen = Random() # no seed at present result = [[None for i in range(0,(numbercolumns))] for j in range(0,(numberrows))] for row in result: for col in range(0,(numbercolumns)-1): offset = col + 1 row[col] = chr(ord('a')+col) \ + str(rgen.randrange(0,offset*maxvalue,step=offset)) row[-1] = 'T' + str(correlator(numbercolumns, numberrows, maxvalue)) genhand = result return result def mycollerator(numbercolumns, numberrows, maxvalue): rgen = Random() # no seed at present return(rgen.randrange(0,numbercolumns*maxvalue, step=2)) gendata(4, 20, 4, mycollerator) def evi(row): # Match any last char match return (('a2' in row) and ('c3' in row)) def hypo(row): # Match any last char match return row[-1][-1] in ['2', '3']