better distance heuristic

This commit is contained in:
Jana Dönszelmann 2025-09-11 22:58:52 -07:00
parent 20a6d2faeb
commit 2bf743a272
No known key found for this signature in database
3 changed files with 179 additions and 91 deletions

View file

@ -2,15 +2,18 @@ use std::fmt::Display;
use simple_mcts::Game; use simple_mcts::Game;
use crate::pathfind::GoalDistanceMap;
#[derive(Clone, Copy, PartialEq)] #[derive(Clone, Copy, PartialEq)]
pub struct PlayerState { pub struct PlayerState {
xy: u8, xy: u8,
walls_left: u8, pub walls_left: u8,
} }
impl PlayerState { impl PlayerState {
pub const P1_START: Self = Self::new(4, 0, 9); pub const INITIAL_WALLS: u8 = 10;
pub const P2_START: Self = Self::new(4, 8, 9); pub const P1_START: Self = Self::new(4, 0, Self::INITIAL_WALLS);
pub const P2_START: Self = Self::new(4, 8, Self::INITIAL_WALLS);
pub const fn new(x: u8, y: u8, walls_left: u8) -> Self { pub const fn new(x: u8, y: u8, walls_left: u8) -> Self {
let mut res = Self { xy: 0, walls_left }; let mut res = Self { xy: 0, walls_left };
@ -54,24 +57,24 @@ pub struct WallState {
impl WallState { impl WallState {
#[inline] #[inline]
pub fn block_cleaned_hori(&mut self, byte_idx: u8, bit: u8) { fn block_cleaned_hori(&mut self, byte_idx: u8, bit: u8) {
self.horizontals[byte_idx as usize] |= 1 << bit; self.horizontals[byte_idx as usize] |= 1 << bit;
} }
#[inline] #[inline]
pub fn block_cleaned_verti(&mut self, byte_idx: u8, bit: u8) { fn block_cleaned_verti(&mut self, byte_idx: u8, bit: u8) {
self.verticals[byte_idx as usize] |= 1 << bit; self.verticals[byte_idx as usize] |= 1 << bit;
} }
#[inline] #[inline]
pub fn can_walk_between_cleaned_hori(&self, byte_idx: u8, bit: u8) -> bool { fn can_walk_between_cleaned_hori(&self, byte_idx: u8, bit: u8) -> bool {
(self.horizontals[byte_idx as usize] >> bit) & 1 != 0 (self.horizontals[byte_idx as usize] >> bit) & 1 != 0
} }
#[inline] #[inline]
pub fn can_walk_between_cleaned_verti(&self, byte_idx: u8, bit: u8) -> bool { fn can_walk_between_cleaned_verti(&self, byte_idx: u8, bit: u8) -> bool {
(self.verticals[byte_idx as usize] >> bit) & 1 != 0 (self.verticals[byte_idx as usize] >> bit) & 1 != 0
} }
pub fn block(&mut self, from_x: u8, from_y: u8, to_x: u8, to_y: u8) { fn block(&mut self, from_x: u8, from_y: u8, to_x: u8, to_y: u8) {
match (from_x.wrapping_sub(to_x), from_y.wrapping_sub(to_y)) { match (from_x.wrapping_sub(to_x), from_y.wrapping_sub(to_y)) {
(1, 0) => self.block_cleaned_verti(to_y, to_x), (1, 0) => self.block_cleaned_verti(to_y, to_x),
(0xff, 0) => self.block_cleaned_verti(from_y, from_x), (0xff, 0) => self.block_cleaned_verti(from_y, from_x),
@ -92,6 +95,24 @@ impl WallState {
_ => unreachable!(), _ => unreachable!(),
} }
} }
pub fn can_place(&self, x: u8, y: u8, vertical: bool) -> bool {
if vertical {
self.can_walk_between(x, y, x + 1, y) && self.can_walk_between(x, y + 1, x + 1, y + 1)
} else {
self.can_walk_between(x, y, x, y + 1) && self.can_walk_between(x + 1, y, x + 1, y + 1)
}
}
pub fn place(&mut self, x: u8, y: u8, vertical: bool) {
if vertical {
self.block(x, y, x + 1, y);
self.block(x, y + 1, x + 1, y + 1);
} else {
self.block(x, y, x, y + 1);
self.block(x + 1, y, x + 1, y + 1);
}
}
} }
impl Default for WallState { impl Default for WallState {
@ -103,30 +124,55 @@ impl Default for WallState {
} }
} }
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PlayerIdentifier {
P1,
P2,
}
impl PlayerIdentifier {
pub fn swap(&mut self) {
use PlayerIdentifier::*;
*self = match self {
P1 => P2,
P2 => P1,
};
}
pub const fn y_goal(&self) -> u8 {
match self {
PlayerIdentifier::P1 => 8,
PlayerIdentifier::P2 => 0,
}
}
}
#[derive(Clone, Copy)] #[derive(Clone, Copy)]
pub struct GameState { pub struct GameState {
pub p1: PlayerState, pub p1: PlayerState,
pub p2: PlayerState, pub p2: PlayerState,
pub walls: WallState, pub walls: WallState,
pub whose_turn: bool, pub current_player: PlayerIdentifier,
} }
impl GameState { impl GameState {
pub fn current_player(&self) -> &PlayerState { pub fn current_player_state(&self) -> &PlayerState {
if self.whose_turn { &self.p1 } else { &self.p2 } match self.current_player {
PlayerIdentifier::P1 => &self.p1,
PlayerIdentifier::P2 => &self.p2,
}
} }
pub fn current_player_mut(&mut self) -> &mut PlayerState { pub fn current_player_state_mut(&mut self) -> &mut PlayerState {
if self.whose_turn { match self.current_player {
&mut self.p1 PlayerIdentifier::P1 => &mut self.p1,
} else { PlayerIdentifier::P2 => &mut self.p2,
&mut self.p2
} }
} }
pub fn mcts_result(&self) -> Option<f64> { pub fn mcts_result(&self) -> Option<f64> {
let p1_won = self.p1.y() == 8; let p1_won = self.p1.y() == PlayerIdentifier::P1.y_goal();
let p2_won = self.p2.y() == 0; let p2_won = self.p2.y() == PlayerIdentifier::P2.y_goal();
let outcome_for_p1 = match (p1_won, p2_won) { let outcome_for_p1 = match (p1_won, p2_won) {
(false, false) => return None, (false, false) => return None,
@ -135,12 +181,9 @@ impl GameState {
(true, true) => 0.0, (true, true) => 0.0,
}; };
Some(if self.whose_turn { Some(match self.current_player {
//p1 wants to win PlayerIdentifier::P1 => outcome_for_p1,
outcome_for_p1 PlayerIdentifier::P2 => -1.0 * outcome_for_p1,
} else {
//p2 wants to win
-1.0 * outcome_for_p1
}) })
} }
} }
@ -151,13 +194,15 @@ impl Default for GameState {
p1: PlayerState::P1_START, p1: PlayerState::P1_START,
p2: PlayerState::P2_START, p2: PlayerState::P2_START,
walls: Default::default(), walls: Default::default(),
whose_turn: true, current_player: PlayerIdentifier::P1,
} }
} }
} }
impl Display for GameState { impl Display for GameState {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let dm = GoalDistanceMap::new(&self.walls, self.current_player);
writeln!( writeln!(
f, f,
"P1: {}, P2: {}\n", "P1: {}, P2: {}\n",
@ -191,12 +236,12 @@ impl Display for GameState {
}; };
write!(f, "{wall}")?; write!(f, "{wall}")?;
} }
let player = if self.p1.x() == x && 8 - self.p1.y() == y { let player = if self.p1.x() == x && self.p1.y() == y {
"P1" "\x1b[1mP1\x1b[0m".to_string()
} else if self.p2.x() == x && 8 - self.p2.y() == y { } else if self.p2.x() == x && self.p2.y() == y {
"P2" "\x1b[1mP2\x1b[0m".to_string()
} else { } else {
" " format!("{:^2}", dm.at(x, y))
}; };
write!(f, "{player}")?; write!(f, "{player}")?;
} }
@ -209,7 +254,7 @@ impl Display for GameState {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use crate::gamestate::{GameState, PlayerState, WallState}; use crate::gamestate::WallState;
#[test] #[test]
fn test_blocking() { fn test_blocking() {
@ -233,14 +278,5 @@ mod tests {
assert!(w.can_walk_between(7, 0, 8, 0)); assert!(w.can_walk_between(7, 0, 8, 0));
w.block(7, 0, 8, 0); w.block(7, 0, 8, 0);
assert!(!w.can_walk_between(7, 0, 8, 0)); assert!(!w.can_walk_between(7, 0, 8, 0));
println!(
"{}",
GameState {
p1: PlayerState::P1_START,
p2: PlayerState::P2_START,
walls: w,
}
);
} }
} }

View file

@ -1,4 +1,3 @@
use std::error::Error;
use std::thread; use std::thread;
use std::time::Duration; use std::time::Duration;
@ -8,8 +7,10 @@ use simple_mcts::Mcts;
use simple_mcts::MctsError; use simple_mcts::MctsError;
use crate::gamestate::GameState; use crate::gamestate::GameState;
use crate::gamestate::PlayerIdentifier;
mod gamestate; mod gamestate;
mod pathfind;
struct Quoridor { struct Quoridor {
state: GameState, state: GameState,
@ -38,23 +39,26 @@ impl Game<NUM_NEXT_STATES> for Quoridor {
fn get_actions(&self) -> [bool; NUM_NEXT_STATES] { fn get_actions(&self) -> [bool; NUM_NEXT_STATES] {
let mut res = [false; NUM_NEXT_STATES]; let mut res = [false; NUM_NEXT_STATES];
if self.state.current_player_state().walls_left > 0 {
for x in 0..8 { for x in 0..8 {
for y in 0..8 { for y in 0..8 {
if self.state.walls.can_walk_between(x, y, x + 1, y) if self.state.walls.can_walk_between(x, y, x + 1, y)
&& self.state.walls.can_walk_between(x, y + 1, x + 1, y + 1) && self.state.walls.can_walk_between(x, y + 1, x + 1, y + 1)
{ {
res[x as usize * 8 + y as usize] = true; res[x as usize * 8 + y as usize] = self.state.walls.can_place(x, y, false);
} }
if self.state.walls.can_walk_between(x, y, x, y + 1) if self.state.walls.can_walk_between(x, y, x, y + 1)
&& self.state.walls.can_walk_between(x + 1, y, x + 1, y + 1) && self.state.walls.can_walk_between(x + 1, y, x + 1, y + 1)
{ {
res[x as usize * 8 + y as usize + 64] = true; res[x as usize * 8 + y as usize + 64] =
self.state.walls.can_place(x, y, true);
}
} }
} }
} }
let p = self.state.current_player(); let p = self.state.current_player_state();
let x = p.x(); let x = p.x();
let y = p.y(); let y = p.y();
res[128] = if x == 0 { res[128] = if x == 0 {
@ -78,8 +82,6 @@ impl Game<NUM_NEXT_STATES> for Quoridor {
self.state.walls.can_walk_between(x, y, x, y + 1) self.state.walls.can_walk_between(x, y, x, y + 1)
}; };
// TODO: detect jumps for the 4 directions and also blocked jumps for the 2 sides of the other pawn // TODO: detect jumps for the 4 directions and also blocked jumps for the 2 sides of the other pawn
// dbg!(&res[128..132]);
res res
} }
@ -88,56 +90,40 @@ impl Game<NUM_NEXT_STATES> for Quoridor {
} }
fn play(&mut self, action: usize) { fn play(&mut self, action: usize) {
if action <= 128 {
self.state.current_player_state_mut().walls_left -= 1;
}
let mut set_block = |i: usize, vertical| { let mut set_block = |i: usize, vertical| {
let x = (i / 8) as u8; let x = (i / 8) as u8;
let y = (i % 8) as u8; let y = (i % 8) as u8;
self.state.walls.place(x, y, vertical);
if vertical {
debug_assert!(self.state.walls.can_walk_between(x, y, x + 1, y));
debug_assert!(self.state.walls.can_walk_between(x, y + 1, x + 1, y + 1));
self.state.walls.block(x, y, x + 1, y);
self.state.walls.block(x, y + 1, x + 1, y + 1);
debug_assert!(!self.state.walls.can_walk_between(x, y, x + 1, y));
debug_assert!(!self.state.walls.can_walk_between(x, y + 1, x + 1, y + 1));
} else {
debug_assert!(self.state.walls.can_walk_between(x, y, x, y + 1));
debug_assert!(self.state.walls.can_walk_between(x + 1, y, x + 1, y + 1));
self.state.walls.block(x, y, x, y + 1);
self.state.walls.block(x + 1, y, x + 1, y + 1);
debug_assert!(!self.state.walls.can_walk_between(x, y, x, y + 1));
debug_assert!(!self.state.walls.can_walk_between(x + 1, y, x + 1, y + 1));
}
}; };
match action { match action {
i @ 0..64 => set_block(i, true), i @ 0..64 => set_block(i, true),
i @ 64..128 => set_block(i - 64, false), i @ 64..128 => set_block(i - 64, false),
128 => { 128 => {
let x = self.state.current_player().x(); let x = self.state.current_player_state().x();
self.state.current_player_mut().set_x(x - 1); self.state.current_player_state_mut().set_x(x - 1);
} }
129 => { 129 => {
let y = self.state.current_player().y(); let y = self.state.current_player_state().y();
self.state.current_player_mut().set_y(y - 1); self.state.current_player_state_mut().set_y(y - 1);
} }
130 => { 130 => {
let x = self.state.current_player().x(); let x = self.state.current_player_state().x();
self.state.current_player_mut().set_x(x + 1); self.state.current_player_state_mut().set_x(x + 1);
} }
131 => { 131 => {
let y = self.state.current_player().y(); let y = self.state.current_player_state().y();
self.state.current_player_mut().set_x(y + 1); self.state.current_player_state_mut().set_y(y + 1);
} }
132 => todo!(), 132 => todo!(),
133 => todo!(), 133 => todo!(),
_ => unreachable!(), _ => unreachable!(),
} }
self.state.whose_turn = !self.state.whose_turn; self.state.current_player.swap();
} }
fn get_state(&self) -> Self::State { fn get_state(&self) -> Self::State {
@ -153,9 +139,8 @@ impl Game<NUM_NEXT_STATES> for Quoridor {
} }
} }
struct QuoridorEvaluator; struct ResultEvaluator;
impl GameEvaluator<Quoridor, NUM_NEXT_STATES> for ResultEvaluator {
impl GameEvaluator<Quoridor, NUM_NEXT_STATES> for QuoridorEvaluator {
fn evaluate(&self, state: GameState) -> (f64, [f64; NUM_NEXT_STATES]) { fn evaluate(&self, state: GameState) -> (f64, [f64; NUM_NEXT_STATES]) {
( (
state.mcts_result().unwrap_or(0.0), state.mcts_result().unwrap_or(0.0),
@ -164,15 +149,31 @@ impl GameEvaluator<Quoridor, NUM_NEXT_STATES> for QuoridorEvaluator {
} }
} }
struct ProgressEvaluator;
impl GameEvaluator<Quoridor, NUM_NEXT_STATES> for ProgressEvaluator {
fn evaluate(&self, state: GameState) -> (f64, [f64; NUM_NEXT_STATES]) {
let progress_across = 8 - state
.current_player
.y_goal()
.abs_diff(state.current_player_state().y());
(
progress_across as f64 / 8.0,
[const { 1.0 / NUM_NEXT_STATES as f64 }; NUM_NEXT_STATES],
)
}
}
fn main() -> Result<(), MctsError> { fn main() -> Result<(), MctsError> {
let g = Quoridor::default(); let mut g = Quoridor::default();
g.state.walls.place(4, 4, false);
let mut mcts: Mcts<Quoridor, _> = Mcts::<Quoridor, _>::new(); let mut mcts: Mcts<Quoridor, _> = Mcts::<Quoridor, _>::new();
let evaluator = QuoridorEvaluator; let evaluator = ProgressEvaluator;
for _ in 0..100 { for _ in 0..100 {
// Perform 100 MCTS iterations // Perform 100 MCTS iterations
for _ in 0..1000 { for _ in 0..10_000 {
mcts.iterate(&evaluator)?; mcts.iterate(&evaluator)?;
} }
@ -187,11 +188,18 @@ fn main() -> Result<(), MctsError> {
.max_by(|&(_, &a), &(_, &b)| a.partial_cmp(&b).unwrap()) .max_by(|&(_, &a), &(_, &b)| a.partial_cmp(&b).unwrap())
.map(|(index, _)| index) .map(|(index, _)| index)
.unwrap_or(0); // Default to first action if policy is empty .unwrap_or(0); // Default to first action if policy is empty
// let mut x = policy.iter().enumerate().collect::<Vec<_>>();
// x.sort_by(|&(_, &a), &(_, &b)| a.total_cmp(&b));
// let top_5 = &x[x.len() - 5..];
// println!("{top_5:?}");
// let best_action_index = x.last().map(|(index, _)| *index).unwrap_or(0);
println!("best action: {best_action_index}"); println!("best action: {best_action_index}");
mcts.play(best_action_index)?; mcts.play(best_action_index)?;
println!("{}", mcts.get_game().state); println!("{}", mcts.get_game().state);
thread::sleep(Duration::from_millis(50)); // thread::sleep(Duration::from_millis(500));
} }
// Continue with the next game state // Continue with the next game state

44
src/pathfind.rs Normal file
View file

@ -0,0 +1,44 @@
use std::collections::VecDeque;
use crate::gamestate::{PlayerIdentifier, WallState};
pub struct GoalDistanceMap {
distances: [[u8; 9]; 9],
}
impl GoalDistanceMap {
pub fn new(w: &WallState, for_player: PlayerIdentifier) -> Self {
let mut todo = VecDeque::with_capacity(9 * 9);
let mut res = [[u8::MAX; 9]; 9];
for i in 0u8..9 {
todo.push_back(((i, for_player.y_goal()), 0));
}
while let Some(((x, y), distance)) = todo.pop_front() {
if res[y as usize][x as usize] != u8::MAX {
continue;
}
res[y as usize][x as usize] = distance;
if x > 0 && w.can_walk_between(x, y, x - 1, y) {
todo.push_back(((x - 1, y), distance + 1));
}
if x < 8 && w.can_walk_between(x, y, x + 1, y) {
todo.push_back(((x + 1, y), distance + 1));
}
if y > 0 && w.can_walk_between(x, y, x, y - 1) {
todo.push_back(((x, y - 1), distance + 1));
}
if y < 8 && w.can_walk_between(x, y, x, y + 1) {
todo.push_back(((x, y + 1), distance + 1));
}
}
Self { distances: res }
}
pub fn at(&self, x: u8, y: u8) -> u8 {
self.distances[y as usize][x as usize]
}
}