Skip to content

Commit

Permalink
raising unlimited preflop with max_raising_rounds=100, max_steps_afte…
Browse files Browse the repository at this point in the history
…r_raiser=100
  • Loading branch information
dickreuter committed Feb 16, 2024
1 parent 1af9d9a commit 443b925
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 14 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,5 @@ Graph/
/EquityCalculatorMontecarlo.pdb
/EquityCalculatorMontecarlo.pyd
/python37.dll
build/*
.vscode/
8 changes: 7 additions & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -438,7 +438,6 @@ disable=raw-checker-failed,
too-many-return-statements,
too-few-public-methods,
too-many-instance-attributes,
superfluos-parens,
fixme,
arguments-differ,
too-many-branches,
Expand All @@ -448,6 +447,13 @@ disable=raw-checker-failed,
too-many-locals,
superfluous-parens,
useless-option-value,
duplicate-code,
raise-missing-from,
inconsistent-return-statements,
unspecified-encoding,
too-many-statements,
consider-using-in,




Expand Down
2 changes: 1 addition & 1 deletion agents/agent_keras_rl_dqn.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Player based on a trained neural network"""
# pylint: disable=wrong-import-order
# pylint: disable=wrong-import-order,invalid-name
import logging
import time

Expand Down
24 changes: 16 additions & 8 deletions gym_env/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,8 @@ class HoldemTable(Env):
"""Pokergame environment"""

def __init__(self, initial_stacks=100, small_blind=1, big_blind=2, render=False, funds_plot=True,
max_raising_rounds=2, use_cpp_montecarlo=False):
max_raising_rounds=2, use_cpp_montecarlo=False,
max_steps_after_raiser=None):
"""
The table needs to be initialized once at the beginning
Expand All @@ -101,6 +102,8 @@ def __init__(self, initial_stacks=100, small_blind=1, big_blind=2, render=False,
render (bool): render table after each move in graphical format
funds_plot (bool): show plot of funds history at end of each episode
max_raising_rounds (int): max raises per round per player
max_steps_after_raiser (int): max steps after raiser to end round. If None it will default to 2*len(players) - 1
use_cpp_montecarlo (bool): use cpp montecarlo for equity calculation
"""
if use_cpp_montecarlo:
Expand All @@ -125,7 +128,7 @@ def __init__(self, initial_stacks=100, small_blind=1, big_blind=2, render=False,
self.last_player_pot = None
self.viewer = None
self.player_max_win = None # used for side pots
self.second_round = False
self.round_number = 0
self.last_caller = None
self.last_raiser = None
self.raisers = []
Expand Down Expand Up @@ -159,6 +162,8 @@ def __init__(self, initial_stacks=100, small_blind=1, big_blind=2, render=False,
self.action_space = Discrete(len(Action) - 2)
self.first_action_for_hand = None

self.initial_max_steps_after_raiser = max_steps_after_raiser

def reset(self):
"""Reset after game over."""
self.observation = None
Expand All @@ -177,7 +182,10 @@ def reset(self):
player.stack = self.initial_stacks

self.dealer_pos = 0
self.player_cycle = PlayerCycle(self.players, dealer_idx=-1, max_steps_after_raiser=len(self.players) - 1,
if not self.initial_max_steps_after_raiser:
self.initial_max_steps_after_raiser = len(self.players) - 1
self.player_cycle = PlayerCycle(self.players, dealer_idx=-1,
max_steps_after_raiser=self.initial_max_steps_after_raiser,
max_steps_after_big_blind=len(self.players))
self._start_new_hand()
self._get_environment()
Expand Down Expand Up @@ -393,7 +401,7 @@ def _process_decision(self, action): # pylint: disable=too-many-statements
self.player_max_win[self.current_player.seat] += contribution # side pot

pos = self.player_cycle.idx
rnd = self.stage.value + self.second_round
rnd = self.stage.value + self.round_number
self.stage_data[rnd].calls[pos] = action == Action.CALL
self.stage_data[rnd].raises[pos] = action in [Action.RAISE_2POT, Action.RAISE_HALF_POT, Action.RAISE_POT]
self.stage_data[rnd].min_call_at_action[pos] = self.min_call / (self.big_blind * 100)
Expand Down Expand Up @@ -750,7 +758,7 @@ def __init__(self, lst, start_idx=0, dealer_idx=0, max_steps_total=None,
self.last_raiser = None
self.step_counter = 0
self.steps_for_blind_betting = 2
self.second_round = False
self.round_number = 0
self.idx = 0
self.dealer_idx = dealer_idx
self.can_still_make_moves_in_this_hand = [] # if the player can still play in this round
Expand All @@ -771,7 +779,7 @@ def new_hand_reset(self):
def new_round_reset(self):
"""Reset the state for the next stage: flop, turn or river"""
self.step_counter = 0
self.second_round = False
self.round_number = 0
self.idx = self.dealer_idx
self.last_raiser_step = len(self.lst)
self.checkers = 0
Expand All @@ -786,7 +794,7 @@ def next_player(self, step=1):
self.step_counter += step
self.idx %= len(self.lst)
if self.step_counter > len(self.lst):
self.second_round = True
self.round_number += 1
if self.max_steps_total and (self.step_counter >= self.max_steps_total):
log.debug("Max steps total has been reached")
return False
Expand Down Expand Up @@ -868,7 +876,7 @@ def mark_out_of_cash_but_contributed(self):
def mark_bb(self):
"""Ensure bb can raise"""
self.last_raiser_step = self.step_counter + len(self.lst)
self.max_steps_total = self.step_counter + len(self.lst) * 2
# self.max_steps_total = self.step_counter + len(self.lst) * 2

def is_raising_allowed(self):
"""Check if raising is still allowed at this position"""
Expand Down
42 changes: 39 additions & 3 deletions tests/test_gym_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,16 @@
from gym_env.env import HoldemTable, Action, Stage, PlayerCycle


def _create_env(n_players):
def _create_env(n_players,
initial_stacks=100, small_blind=1, big_blind=2, render=False, funds_plot=True,
max_raising_rounds=2, max_steps_after_raiser=None,
use_cpp_montecarlo=False):
"""Create an environment"""
env = HoldemTable()
env = HoldemTable(small_blind=small_blind, big_blind=big_blind, initial_stacks=initial_stacks,
max_raising_rounds=max_raising_rounds,
max_steps_after_raiser=max_steps_after_raiser,
use_cpp_montecarlo=use_cpp_montecarlo)

for _ in range(n_players):
player = PlayerForTest()
env.add_player(player)
Expand Down Expand Up @@ -34,7 +41,7 @@ def test_basic_actions_with_6_player():
assert env.players[2].stack == 98
assert env.stage == Stage.PREFLOP
env.step(Action.RAISE_POT) # big blind raises
assert env.player_cycle.second_round
assert env.player_cycle.round_number
env.step(Action.FOLD) # utg
env.step(Action.CALL) # 4 only remaining player calls
assert env.stage == Stage.FLOP
Expand Down Expand Up @@ -256,3 +263,32 @@ def test_call_proper_amount():
assert env.stage_data[0].contribution[0] == 0.03
assert env.stage_data[0].contribution[1] == 0.01
assert env.stage_data[0].contribution[2] == 0.03


def test_unlimited_raising_preflop():
"""Test raising unlimited preflop"""
env = _create_env(2, initial_stacks=100000, max_raising_rounds=100, max_steps_after_raiser=100)
env.step(Action.CALL) # seat 3 utg
env.step(Action.RAISE_POT) # seat 4
env.step(Action.CALL) # seat 0 dealer
env.step(Action.RAISE_POT) # seat 1 small blind
env.step(Action.CALL) # seat 2 big blind
assert env.stage == Stage.PREFLOP
env.step(Action.CALL) # seat 3 utg
env.step(Action.RAISE_POT) # seat 4
env.step(Action.CALL) # seat 0 dealer
env.step(Action.RAISE_POT) # seat 1 small blind
env.step(Action.CALL) # seat 2 big blind
assert env.stage == Stage.PREFLOP
env.step(Action.CALL) # seat 3 utg
env.step(Action.RAISE_POT) # seat 4
env.step(Action.CALL) # seat 0 dealer
env.step(Action.RAISE_POT) # seat 1 small blind
env.step(Action.CALL) # seat 2 big blind
assert env.stage == Stage.PREFLOP
env.step(Action.CALL) # seat 3 utg
env.step(Action.RAISE_POT) # seat 4
env.step(Action.CALL) # seat 0 dealer
env.step(Action.RAISE_POT) # seat 1 small blind
env.step(Action.CALL) # seat 2 big blind
assert env.stage == Stage.PREFLOP
2 changes: 1 addition & 1 deletion tools/hand_evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,6 @@ def _calc_score(hand):
hand_type = "HighCard"
card_ranks = card_ranks[:5]
else:
raise Exception('Card Type error!')
raise Exception('Card Type error!') # pylint: disable=broad-exception-raised

return score, card_ranks, hand_type

0 comments on commit 443b925

Please sign in to comment.