#!/usr/bin/python
# BayesCards.py demos Bayes' rule (conditional probability) with a card deck.
# Initial code 2017, hyptothesis and evidence may be functions as of 10/2019.
# Update 10/26/2022 to handle multiple evidence items in a tuple or list
# for demonstrating failed statistical independence in cprob().

import random
import copy
import types

SuitToColor = {
    "hearts" : "red",
    "diamonds" : "red",
    "clubs" : "black",
    "spades" : "black"
}
colors = set(SuitToColor.values())

values = [str(v) for v in range(2,11)] # 2 through 10 as string
values = values + ["jack", "queen", "king", "ace"]
hand = None # I am using a global in case I forget to assign deal's result.

deck = []
for v in values:
    for s in SuitToColor.keys():
        deck.append((v, s, SuitToColor[s]))
deckCopy = copy.copy(deck)

print("There are", len(deck), "cards in the deck.")

def deal(count):
    global deckCopy
    global hand
    deckCopy = copy.copy(deck)  # In case we called deal before
    result = []
    if count > len(deckCopy):
        raise ValueError("ERROR, count " + str(count) + " is too high.")
    for i in range(0, count):
        index = random.randrange(0, len(deckCopy))
        result.append(deckCopy[index])
        del deckCopy[index]
    # I am using a global in case I forget to assign deal's result.
    hand = result
    return result

def cprob(thehand, hypothesis, evidence):
    '''
    hypothesis, evidence may be string or predicate-function.
    Updated 10/26/2022 for case where evidence is a list or tuple 
    of evidence tests assumed to be statistically independent.
    '''
    if isinstance(evidence,list) or isinstance(evidence,tuple):
        # Run them separately and then in the aggregate as an error check.
        result = 1.0
        for e in evidence:
            result = result * cprob(thehand, hypothesis, e)
            print("cprob(thehand,",hypothesis,"given",e,"=",cprob(thehand, hypothesis, e))
        print("\nPRODUCT OF SEPARATE PER-EVIDENCE RUNS", result)
        return result
    # https://brilliant.org/wiki/bayes-theorem/
    # P(H | E) = (P(E | H) / P(E)) x P(H)
    # return the probability of the hypothesis, given the evidence
    PHC = 0.0    # P(H)'s count
    PEC = 0.0    # P(E)'s count
    PHEC = 0.0   # P(H | E)'s count
    PEHC = 0.0   # P(E | H)'s count
    for card in thehand:
        if ((type(evidence) == types.FunctionType and evidence(card))
                or (evidence in card)):
            PEC += 1
            if ((type(hypothesis) == types.FunctionType and hypothesis(card))
                    or (hypothesis in card)):
                PHEC += 1
        if ((type(hypothesis) == types.FunctionType and hypothesis(card))
                or (hypothesis in card)):
            PHC += 1
            if ((type(evidence) == types.FunctionType and evidence(card))
                    or (evidence in card)):
                PEHC += 1
    PE = PEC / len(thehand)
    PH = PHC / len(thehand)
    print('\nSTATE DUMP FOR HYPOTHESIS', hypothesis,
        'GIVEN EVIDENCE', evidence)
    print("Out of", len(thehand), "cards, PE =", PE, ", PH = ", PH)
    result = 0.0
    PEH = PEHC/PHC if PHC > 0 else None  # PEH = (PHC > 0) ? PEHC/PHC : None
    PHE = PHEC/PEC if PEC > 0 else None  # Above is the C/C++/Java syntax for
                                         # a conditional expression.
    if PHC == 0:
        print("P(H) of 0 gives multiplier of 0, tentative result of 0.\n")
        result = 0.0
    if PEC == 0:
        print("Cannot compute P(H | E) with P(E) of 0.\n")
        result = None
    if PHC > 0 and PEC > 0:
        print("Measured P(E | H) =",PEH,",measured P(H | E) =", PHE,
            ", computed P(H | E) =", (PEH / PE) * PH, '\n')
        result =  (PEH / PE) * PH
    return result

from random import Random
genhand = None
def gendata(numbercolumns, numberrows, maxvalue, correlator):
    '''
    Generate numberrows of numbercolumns string data with
    correlator being a function to generate correlations between
    the numbercolumns-1 non-target attributes and the 1 target attribute.
    For now numbers are random ints ranging from [0, maxvalue)
    for column 0, [0, maxvalue*2 by 2's) for column 1, etc.
    '''
    global genhand
    rgen = Random() # no seed at present
    result = [[None for i in range(0,(numbercolumns))]
                for j in range(0,(numberrows))]
    for row in result:
        for col in range(0,(numbercolumns)-1):
            offset = col + 1
            row[col] = chr(ord('a')+col)        \
                + str(rgen.randrange(0,offset*maxvalue,step=offset))
        row[-1] = 'T' + str(correlator(numbercolumns, numberrows, maxvalue))
    genhand = result
    return result

def mycollerator(numbercolumns, numberrows, maxvalue):
    rgen = Random() # no seed at present
    return(rgen.randrange(0,numbercolumns*maxvalue, step=2))

gendata(4, 20, 4, mycollerator)

def evi(row):      # Match any last char match
    return (('a2' in row) and ('c3' in row))

def hypo(row):      # Match any last char match
    return row[-1][-1] in ['2', '3']