贪婪算法
背景知识
假如有10台老虎机,其中每台老虎机中奖的概率都不相同,只能玩5000次,如何获取最大的收益?
代码实现
import numpy as np
import random
# pi[x]代表每台老虎机中奖概率
pi = [0.1,0.2,0.4,0.8,0.5,0.6,0.3,0.6,0.7,0.1]
pi_reward = [[1] for _ in range(10)]
def play_one():
count = sum([len(i) for i in pi_reward])
# 前期多探索,后期多利用
if random.random()<1/count:
count +=1
return random.randint(0,9)
return np.argmax([sum(i)/len(i) for i in pi_reward])
def tryAndplay():
index = play_one()
if random.random()<pi[index]:
pi_reward[index].append(1)
else:
pi_reward[index].append(0)
for i in range(5000):
tryAndplay()
target = max(pi)*5000
result = sum([sum(i) for i in pi_reward])
print(target)
print(result)