raising unlimited preflop with max_raising_rounds=100, max_steps_afte…

…r_raiser=100
dickreuter · Feb 16, 2024 · 443b925 · 443b925
1 parent 1af9d9a
commit 443b925
Show file tree

Hide file tree

Showing 6 changed files with 66 additions and 14 deletions.
diff --git a/.gitignore b/.gitignore
@@ -27,3 +27,5 @@ Graph/
 /EquityCalculatorMontecarlo.pdb
 /EquityCalculatorMontecarlo.pyd
 /python37.dll
+build/*
+.vscode/
diff --git a/.pylintrc b/.pylintrc
@@ -438,7 +438,6 @@ disable=raw-checker-failed,
         too-many-return-statements,
         too-few-public-methods,
         too-many-instance-attributes,
-        superfluos-parens,
         fixme,
         arguments-differ,
         too-many-branches,
@@ -448,6 +447,13 @@ disable=raw-checker-failed,
         too-many-locals,
         superfluous-parens,
         useless-option-value,
+        duplicate-code,
+        raise-missing-from,
+        inconsistent-return-statements,
+        unspecified-encoding,
+        too-many-statements,
+        consider-using-in,
+
 
 
 

diff --git a/agents/agent_keras_rl_dqn.py b/agents/agent_keras_rl_dqn.py
@@ -1,5 +1,5 @@
 """Player based on a trained neural network"""
-# pylint: disable=wrong-import-order
+# pylint: disable=wrong-import-order,invalid-name
 import logging
 import time
 

diff --git a/gym_env/env.py b/gym_env/env.py
@@ -89,7 +89,8 @@ class HoldemTable(Env):
     """Pokergame environment"""
 
     def __init__(self, initial_stacks=100, small_blind=1, big_blind=2, render=False, funds_plot=True,
-                 max_raising_rounds=2, use_cpp_montecarlo=False):
+                 max_raising_rounds=2, use_cpp_montecarlo=False,
+                 max_steps_after_raiser=None):
         """
         The table needs to be initialized once at the beginning
 
@@ -101,6 +102,8 @@ def __init__(self, initial_stacks=100, small_blind=1, big_blind=2, render=False,
             render (bool): render table after each move in graphical format
             funds_plot (bool): show plot of funds history at end of each episode
             max_raising_rounds (int): max raises per round per player
+            max_steps_after_raiser (int): max steps after raiser to end round. If None it will default to 2*len(players) - 1
+            use_cpp_montecarlo (bool): use cpp montecarlo for equity calculation
 
         """
         if use_cpp_montecarlo:
@@ -125,7 +128,7 @@ def __init__(self, initial_stacks=100, small_blind=1, big_blind=2, render=False,
         self.last_player_pot = None
         self.viewer = None
         self.player_max_win = None  # used for side pots
-        self.second_round = False
+        self.round_number = 0
         self.last_caller = None
         self.last_raiser = None
         self.raisers = []
@@ -159,6 +162,8 @@ def __init__(self, initial_stacks=100, small_blind=1, big_blind=2, render=False,
         self.action_space = Discrete(len(Action) - 2)
         self.first_action_for_hand = None
 
+        self.initial_max_steps_after_raiser = max_steps_after_raiser
+
     def reset(self):
         """Reset after game over."""
         self.observation = None
@@ -177,7 +182,10 @@ def reset(self):
             player.stack = self.initial_stacks
 
         self.dealer_pos = 0
-        self.player_cycle = PlayerCycle(self.players, dealer_idx=-1, max_steps_after_raiser=len(self.players) - 1,
+        if not self.initial_max_steps_after_raiser:
+            self.initial_max_steps_after_raiser = len(self.players) - 1
+        self.player_cycle = PlayerCycle(self.players, dealer_idx=-1,
+                                        max_steps_after_raiser=self.initial_max_steps_after_raiser,
                                         max_steps_after_big_blind=len(self.players))
         self._start_new_hand()
         self._get_environment()
@@ -393,7 +401,7 @@ def _process_decision(self, action):  # pylint: disable=too-many-statements
             self.player_max_win[self.current_player.seat] += contribution  # side pot
 
             pos = self.player_cycle.idx
-            rnd = self.stage.value + self.second_round
+            rnd = self.stage.value + self.round_number
             self.stage_data[rnd].calls[pos] = action == Action.CALL
             self.stage_data[rnd].raises[pos] = action in [Action.RAISE_2POT, Action.RAISE_HALF_POT, Action.RAISE_POT]
             self.stage_data[rnd].min_call_at_action[pos] = self.min_call / (self.big_blind * 100)
@@ -750,7 +758,7 @@ def __init__(self, lst, start_idx=0, dealer_idx=0, max_steps_total=None,
         self.last_raiser = None
         self.step_counter = 0
         self.steps_for_blind_betting = 2
-        self.second_round = False
+        self.round_number = 0
         self.idx = 0
         self.dealer_idx = dealer_idx
         self.can_still_make_moves_in_this_hand = []  # if the player can still play in this round
@@ -771,7 +779,7 @@ def new_hand_reset(self):
     def new_round_reset(self):
         """Reset the state for the next stage: flop, turn or river"""
         self.step_counter = 0
-        self.second_round = False
+        self.round_number = 0
         self.idx = self.dealer_idx
         self.last_raiser_step = len(self.lst)
         self.checkers = 0
@@ -786,7 +794,7 @@ def next_player(self, step=1):
         self.step_counter += step
         self.idx %= len(self.lst)
         if self.step_counter > len(self.lst):
-            self.second_round = True
+            self.round_number += 1
         if self.max_steps_total and (self.step_counter >= self.max_steps_total):
             log.debug("Max steps total has been reached")
             return False
@@ -868,7 +876,7 @@ def mark_out_of_cash_but_contributed(self):
     def mark_bb(self):
         """Ensure bb can raise"""
         self.last_raiser_step = self.step_counter + len(self.lst)
-        self.max_steps_total = self.step_counter + len(self.lst) * 2
+        # self.max_steps_total = self.step_counter + len(self.lst) * 2
 
     def is_raising_allowed(self):
         """Check if raising is still allowed at this position"""

diff --git a/tests/test_gym_env.py b/tests/test_gym_env.py
@@ -4,9 +4,16 @@
 from gym_env.env import HoldemTable, Action, Stage, PlayerCycle
 
 
-def _create_env(n_players):
+def _create_env(n_players,
+                initial_stacks=100, small_blind=1, big_blind=2, render=False, funds_plot=True,
+                max_raising_rounds=2, max_steps_after_raiser=None,
+                use_cpp_montecarlo=False):
     """Create an environment"""
-    env = HoldemTable()
+    env = HoldemTable(small_blind=small_blind, big_blind=big_blind, initial_stacks=initial_stacks,
+                      max_raising_rounds=max_raising_rounds,
+                      max_steps_after_raiser=max_steps_after_raiser,
+                      use_cpp_montecarlo=use_cpp_montecarlo)
+
     for _ in range(n_players):
         player = PlayerForTest()
         env.add_player(player)
@@ -34,7 +41,7 @@ def test_basic_actions_with_6_player():
     assert env.players[2].stack == 98
     assert env.stage == Stage.PREFLOP
     env.step(Action.RAISE_POT)  # big blind raises
-    assert env.player_cycle.second_round
+    assert env.player_cycle.round_number
     env.step(Action.FOLD)  # utg
     env.step(Action.CALL)  # 4 only remaining player calls
     assert env.stage == Stage.FLOP
@@ -256,3 +263,32 @@ def test_call_proper_amount():
     assert env.stage_data[0].contribution[0] == 0.03
     assert env.stage_data[0].contribution[1] == 0.01
     assert env.stage_data[0].contribution[2] == 0.03
+
+
+def test_unlimited_raising_preflop():
+    """Test raising unlimited preflop"""
+    env = _create_env(2, initial_stacks=100000, max_raising_rounds=100, max_steps_after_raiser=100)
+    env.step(Action.CALL)  # seat 3 utg
+    env.step(Action.RAISE_POT)  # seat 4
+    env.step(Action.CALL)  # seat 0 dealer
+    env.step(Action.RAISE_POT)  # seat 1 small blind
+    env.step(Action.CALL)  # seat 2 big blind
+    assert env.stage == Stage.PREFLOP
+    env.step(Action.CALL)  # seat 3 utg
+    env.step(Action.RAISE_POT)  # seat 4
+    env.step(Action.CALL)  # seat 0 dealer
+    env.step(Action.RAISE_POT)  # seat 1 small blind
+    env.step(Action.CALL)  # seat 2 big blind
+    assert env.stage == Stage.PREFLOP
+    env.step(Action.CALL)  # seat 3 utg
+    env.step(Action.RAISE_POT)  # seat 4
+    env.step(Action.CALL)  # seat 0 dealer
+    env.step(Action.RAISE_POT)  # seat 1 small blind
+    env.step(Action.CALL)  # seat 2 big blind
+    assert env.stage == Stage.PREFLOP
+    env.step(Action.CALL)  # seat 3 utg
+    env.step(Action.RAISE_POT)  # seat 4
+    env.step(Action.CALL)  # seat 0 dealer
+    env.step(Action.RAISE_POT)  # seat 1 small blind
+    env.step(Action.CALL)  # seat 2 big blind
+    assert env.stage == Stage.PREFLOP
diff --git a/tools/hand_evaluator.py b/tools/hand_evaluator.py
@@ -114,6 +114,6 @@ def _calc_score(hand):
         hand_type = "HighCard"
         card_ranks = card_ranks[:5]
     else:
-        raise Exception('Card Type error!')
+        raise Exception('Card Type error!')  # pylint: disable=broad-exception-raised
 
     return score, card_ranks, hand_type