projectmesa · adamamer20 · Aug 28, 2024 · Aug 21, 2024 · Aug 21, 2024 · Aug 21, 2024
diff --git a/examples/benchmark_plot_0.png b/examples/benchmark_plot_0.png
diff --git a/examples/sugarscape_ig/benchmark_plot_0.png b/examples/sugarscape_ig/benchmark_plot_0.png
diff --git a/examples/sugarscape_ig/benchmark_plot_1.png b/examples/sugarscape_ig/benchmark_plot_1.png
diff --git a/examples/sugarscape_ig/performance_comparison.py b/examples/sugarscape_ig/performance_comparison.py
@@ -3,15 +3,19 @@
 import matplotlib.pyplot as plt
 import numpy as np
 import perfplot
-import seaborn as sns
 from ss_mesa.model import SugarscapeMesa
 from ss_pandas.model import SugarscapePandas
+from ss_polars.model import SugarscapePolars
 
 
 class SugarScapeSetup:
     def __init__(self, n: int):
+        if n >= 10**6:
+            density = 0.17  # FLAME2-GPU
+        else:
+            density = 0.04  # mesa
         self.n = n
-        dimension = math.ceil(5 * math.sqrt(n))
+        dimension = math.ceil(math.sqrt(n / density))
         self.sugar_grid = np.random.randint(0, 4, (dimension, dimension))
         self.initial_sugar = np.random.randint(6, 25, n)
         self.metabolism = np.random.randint(2, 4, n)
@@ -21,13 +25,19 @@ def __init__(self, n: int):
 def mesa_implementation(setup: SugarScapeSetup):
     return SugarscapeMesa(
         setup.n, setup.sugar_grid, setup.initial_sugar, setup.metabolism, setup.vision
-    )
+    ).run_model(100)
 
 
 def mesa_frames_pandas_concise(setup: SugarScapeSetup):
     return SugarscapePandas(
         setup.n, setup.sugar_grid, setup.initial_sugar, setup.metabolism, setup.vision
-    )
+    ).run_model(100)
+
+
+def mesa_frames_polars_concise(setup: SugarScapeSetup):
+    return SugarscapePolars(
+        setup.n, setup.sugar_grid, setup.initial_sugar, setup.metabolism, setup.vision
+    ).run_model(100)
 
 
 def plot_and_print_benchmark(labels, kernels, n_range, title, image_path):
@@ -40,10 +50,8 @@ def plot_and_print_benchmark(labels, kernels, n_range, title, image_path):
         equality_check=None,
         title=title,
     )
-
     plt.ylabel("Execution time (s)")
     out.save(image_path)
-
     print("\nExecution times:")
     for i, label in enumerate(labels):
         print(f"---------------\n{label}:")
@@ -53,21 +61,36 @@ def plot_and_print_benchmark(labels, kernels, n_range, title, image_path):
 
 
 def main():
+    """# Mesa comparison
     sns.set_theme(style="whitegrid")
-
     labels_0 = [
         "mesa",
-        "mesa-frames (pd concise)",
+        # "mesa-frames (pd concise)",
+        "mesa-frames (pl concise)",
     ]
     kernels_0 = [
         mesa_implementation,
-        mesa_frames_pandas_concise,
+        # mesa_frames_pandas_concise,
+        mesa_frames_polars_concise,
     ]
-    n_range_0 = [k for k in range(0, 100000, 10000)]
+    n_range_0 = [k for k in range(1, 100002, 10000)]
     title_0 = "100 steps of the SugarScape IG model:\n" + " vs ".join(labels_0)
     image_path_0 = "benchmark_plot_0.png"
+    plot_and_print_benchmark(labels_0, kernels_0, n_range_0, title_0, image_path_0)"""
 
-    plot_and_print_benchmark(labels_0, kernels_0, n_range_0, title_0, image_path_0)
+    # FLAME2-GPU comparison
+    labels_1 = [
+        # "mesa-frames (pd concise)",
+        "mesa-frames (pl concise)",
+    ]
+    kernels_1 = [
+        # mesa_frames_pandas_concise,
+        mesa_frames_polars_concise,
+    ]
+    n_range_1 = [k for k in range(1, 3 * 10**6 + 2, 10**6)]
+    title_1 = "100 steps of the SugarScape IG model:\n" + " vs ".join(labels_1)
+    image_path_1 = "benchmark_plot_1.png"
+    plot_and_print_benchmark(labels_1, kernels_1, n_range_1, title_1, image_path_1)
 
 
 if __name__ == "__main__":

diff --git a/examples/sugarscape_ig/ss_polars/__init__.py b/examples/sugarscape_ig/ss_polars/__init__.py
diff --git a/examples/sugarscape_ig/ss_polars/agents.py b/examples/sugarscape_ig/ss_polars/agents.py
@@ -0,0 +1,125 @@
+import numpy as np
+import polars as pl
+
+from mesa_frames import AgentSetPolars, ModelDF
+
+
+class AntPolars(AgentSetPolars):
+    def __init__(
+        self,
+        model: ModelDF,
+        n_agents: int,
+        initial_sugar: np.ndarray | None = None,
+        metabolism: np.ndarray | None = None,
+        vision: np.ndarray | None = None,
+    ):
+        super().__init__(model)
+
+        if initial_sugar is None:
+            initial_sugar = model.random.integers(6, 25, n_agents)
+        if metabolism is None:
+            metabolism = model.random.integers(2, 4, n_agents)
+        if vision is None:
+            vision = model.random.integers(1, 6, n_agents)
+
+        agents = pl.DataFrame(
+            {
+                "unique_id": pl.arange(n_agents, eager=True),
+                "sugar": model.random.integers(6, 25, n_agents),
+                "metabolism": model.random.integers(2, 4, n_agents),
+                "vision": model.random.integers(1, 6, n_agents),
+            }
+        )
+        self.add(agents)
+
+    def move(self):
+        neighborhood: pl.DataFrame = self.space.get_neighborhood(
+            radius=self["vision"], agents=self, include_center=True
+        )
+
+        # Join self.space.cells to obtain properties ('sugar') per cell
+        neighborhood = neighborhood.join(self.space.cells, on=["dim_0", "dim_1"])
+
+        # Join self.pos to obtain the agent_id of the center cell
+        # TODO: get_neighborhood/get_neighbors should return 'agent_id_center' instead of center position when input is AgentLike
+        neighborhood = neighborhood.with_columns(
+            agent_id_center=neighborhood.join(
+                self.pos,
+                left_on=["dim_0_center", "dim_1_center"],
+                right_on=["dim_0", "dim_1"],
+            )["unique_id"]
+        )
+
+        # Order of agents moves based on the original order of agents.
+        # The agent in his cell has order 0 (highest)
+        agent_order = neighborhood.unique(
+            subset=["agent_id_center"], keep="first", maintain_order=True
+        ).with_row_count("agent_order")
+
+        neighborhood = neighborhood.join(agent_order, on="agent_id_center")
+
+        neighborhood = neighborhood.join(
+            agent_order.select(
+                pl.col("agent_id_center").alias("agent_id"),
+                pl.col("agent_order").alias("blocking_agent_order"),
+            ),
+            on="agent_id",
+        )
+
+        # Filter impossible moves
+        neighborhood = neighborhood.filter(
+            pl.col("agent_order") >= pl.col("blocking_agent_order")
+        )
+
+        # Sort cells by sugar and radius (nearest first)
+        neighborhood = neighborhood.sort(["sugar", "radius"], descending=[True, False])
+
+        best_moves = pl.DataFrame()
+        # While there are agents that do not have a best move, keep looking for one
+        while len(best_moves) < len(self.agents):
+            # Get the best moves for each agent and if duplicates are found, select the one with the highest order
+            new_best_moves = (
+                neighborhood.group_by("agent_id_center", maintain_order=True)
+                .first()
+                .sort("agent_order")
+                .unique(subset=["dim_0", "dim_1"], keep="first")
+            )
+
+            # Agents can make the move if:
+            # - There is no blocking agent
+            # - The agent is in its own cell
+            # - The blocking agent has moved before him
+            condition = pl.col("agent_id").is_null() | (
+                pl.col("agent_id") == pl.col("agent_id_center")
+            )
+            if len(best_moves) > 0:
+                condition = condition | pl.col("agent_id").is_in(
+                    best_moves["agent_id_center"]
+                )
+            new_best_moves = new_best_moves.filter(condition)
+
+            best_moves = pl.concat([best_moves, new_best_moves])
+
+            # Remove agents that have already moved
+            neighborhood = neighborhood.filter(
+                ~pl.col("agent_id_center").is_in(best_moves["agent_id_center"])
+            )
+
+            # Remove cells that have been already selected
+            neighborhood = neighborhood.join(
+                best_moves.select(["dim_0", "dim_1"]), on=["dim_0", "dim_1"], how="anti"
+            )
+
+        self.space.move_agents(self, best_moves.select(["dim_0", "dim_1"]))
+
+    def eat(self):
+        cells = self.space.cells.filter(pl.col("agent_id").is_not_null())
+        self[cells["agent_id"], "sugar"] = (
+            self[cells["agent_id"], "sugar"]
+            + cells["sugar"]
+            - self[cells["agent_id"], "metabolism"]
+        )
+
+    def step(self):
+        self.shuffle().do("move").do("eat")
+        self.discard(self.agents.filter(pl.col("sugar") <= 0))
diff --git a/examples/sugarscape_ig/ss_polars/model.py b/examples/sugarscape_ig/ss_polars/model.py
@@ -0,0 +1,49 @@
+import numpy as np
+import polars as pl
+
+from mesa_frames import GridPolars, ModelDF
+
+from .agents import AntPolars
+
+
+class SugarscapePolars(ModelDF):
+    def __init__(
+        self,
+        n_agents: int,
+        sugar_grid: np.ndarray | None = None,
+        initial_sugar: np.ndarray | None = None,
+        metabolism: np.ndarray | None = None,
+        vision: np.ndarray | None = None,
+        width: int | None = None,
+        height: int | None = None,
+    ):
+        super().__init__()
+        if sugar_grid is None:
+            sugar_grid = self.random.integers(0, 4, (width, height))
+        grid_dimensions = sugar_grid.shape
+        self.space = GridPolars(
+            self, grid_dimensions, neighborhood_type="von_neumann", capacity=1
+        )
+        dim_0 = pl.Series("dim_0", pl.arange(grid_dimensions[0], eager=True)).to_frame()
+        dim_1 = pl.Series("dim_1", pl.arange(grid_dimensions[1], eager=True)).to_frame()
+        sugar_grid = dim_0.join(dim_1, how="cross").with_columns(
+            sugar=sugar_grid.flatten(), max_sugar=sugar_grid.flatten()
+        )
+        self.space.set_cells(sugar_grid)
+        self.agents += AntPolars(self, n_agents, initial_sugar, metabolism, vision)
+        self.space.place_to_empty(self.agents)
+
+    def run_model(self, steps: int) -> list[int]:
+        for _ in range(steps):
+            if len(self.agents) == 0:
+                return
+            self.step()
+            empty_cells = self.space.empty_cells
+            full_cells = self.space.full_cells
+
+            max_sugar = self.space.cells.join(
+                empty_cells, on=["dim_0", "dim_1"]
+            ).select(pl.col("max_sugar"))
+
+            self.space.set_cells(full_cells, {"sugar": 0})
+            self.space.set_cells(empty_cells, {"sugar": max_sugar})
diff --git a/mesa_frames/abstract/space.py b/mesa_frames/abstract/space.py
@@ -756,10 +756,9 @@ def set_cells(
                 )
 
         if properties:
-            properties = obj._df_constructor(
+            cells_df = obj._df_constructor(
                 data=properties, index=self._df_index(cells_df, obj._pos_col_names)
             )
-            cells_df = obj._df_join(cells_df, properties, on=obj._pos_col_names)
 
         if "capacity" in obj._df_column_names(cells_df):
             obj._cells_capacity = obj._update_capacity_cells(cells_df)

diff --git a/mesa_frames/concrete/pandas/mixin.py b/mesa_frames/concrete/pandas/mixin.py
@@ -2,12 +2,12 @@
 from typing import Literal
 
 import numpy as np
-from mesa_frames.abstract.mixin import DataFrameMixin
-from mesa_frames.types_ import DataFrame, PandasMask
-from typing_extensions import Any, overload
-
 import pandas as pd
 import polars as pl
+from typing_extensions import Any, overload
+
+from mesa_frames.abstract.mixin import DataFrameMixin
+from mesa_frames.types_ import DataFrame, PandasMask
 
 
 class PandasMixin(DataFrameMixin):
@@ -117,7 +117,17 @@ def _df_constructor(
         elif isinstance(data, pl.DataFrame):
             df = data.to_pandas()
         else:
-            df = pd.DataFrame(data=data, columns=columns, index=index)
+            # We need to try setting the index after,
+            # otherwise if data contains DF/SRS, the values will not be aligned to the index
+            try:
+                df = pd.DataFrame(data=data, columns=columns)
+                if index is not None:
+                    df.index = index
+            except ValueError as e:
+                if str(e) == "If using all scalar values, you must pass an index":
+                    df = pd.DataFrame(data=data, columns=columns, index=index)
+                else:
+                    raise e
         if dtypes:
             df = df.astype(dtypes)
         if index_cols:
@@ -256,35 +266,40 @@ def _df_join(
         | Literal["cross"] = "left",
         suffix="_right",
     ) -> pd.DataFrame:
+        # Preparing the DF allows to speed up the merge operation
+        # https://stackoverflow.com/questions/40860457/improve-pandas-merge-performance
+        # Tried sorting the index after, but it did not improve the performance
+        def _prepare_df(df: pd.DataFrame, on: str | list[str] | None) -> pd.DataFrame:
+            if df.index.name == on or df.index.names == on:
+                return df
+            # Reset index if it is not used as a key to keep it in the DataFrame
+            if df.index.name is not None or df.index.names[0] is not None:
+                df = df.reset_index()
+            df  = df.set_index(on)
+            return df
+
         left_index = False
         right_index = False
         if on:
             left_on = on
             right_on = on
-        if left.index.name and left.index.name == left_on:
+        if how != "cross":
+            left = _prepare_df(left, left_on)
+            right = _prepare_df(right, right_on)
             left_index = True
-            left_on = None
-        if right.index.name and right.index.name == right_on:
             right_index = True
-            right_on = None
-        # Reset index if it is not used as a key to keep it in the DataFrame
-        if not left_index and left.index.name:
-            left = left.reset_index()
-        if not right_index and right.index.name:
-            right = right.reset_index()
         df = left.merge(
             right,
             how=how,
-            left_on=left_on,
-            right_on=right_on,
             left_index=left_index,
             right_index=right_index,
             suffixes=("", suffix),
         )
-        if index_cols:
-            return df.set_index(index_cols)
-        else:
-            return df
+        if how != "cross":
+            df.reset_index(inplace=True)
+        if index_cols is not None:
+            df.set_index(index_cols, inplace=True)
+        return df
 
     def _df_lt(
         self,

diff --git a/pyproject.toml b/pyproject.toml
@@ -29,6 +29,7 @@ polars = [
 docs = [
   "perfplot", #readme_script
   "seaborn", #readme_script
+  "matplotlib", #readme_script
   "mkdocs-material",
   "mkdocs-jupyter",
   "mkdocs-git-revision-date-localized-plugin",