better distance heuristic
This commit is contained in:
parent
20a6d2faeb
commit
2bf743a272
3 changed files with 179 additions and 91 deletions
116
src/gamestate.rs
116
src/gamestate.rs
|
|
@ -2,15 +2,18 @@ use std::fmt::Display;
|
|||
|
||||
use simple_mcts::Game;
|
||||
|
||||
use crate::pathfind::GoalDistanceMap;
|
||||
|
||||
#[derive(Clone, Copy, PartialEq)]
|
||||
pub struct PlayerState {
|
||||
xy: u8,
|
||||
walls_left: u8,
|
||||
pub walls_left: u8,
|
||||
}
|
||||
|
||||
impl PlayerState {
|
||||
pub const P1_START: Self = Self::new(4, 0, 9);
|
||||
pub const P2_START: Self = Self::new(4, 8, 9);
|
||||
pub const INITIAL_WALLS: u8 = 10;
|
||||
pub const P1_START: Self = Self::new(4, 0, Self::INITIAL_WALLS);
|
||||
pub const P2_START: Self = Self::new(4, 8, Self::INITIAL_WALLS);
|
||||
|
||||
pub const fn new(x: u8, y: u8, walls_left: u8) -> Self {
|
||||
let mut res = Self { xy: 0, walls_left };
|
||||
|
|
@ -54,24 +57,24 @@ pub struct WallState {
|
|||
|
||||
impl WallState {
|
||||
#[inline]
|
||||
pub fn block_cleaned_hori(&mut self, byte_idx: u8, bit: u8) {
|
||||
fn block_cleaned_hori(&mut self, byte_idx: u8, bit: u8) {
|
||||
self.horizontals[byte_idx as usize] |= 1 << bit;
|
||||
}
|
||||
#[inline]
|
||||
pub fn block_cleaned_verti(&mut self, byte_idx: u8, bit: u8) {
|
||||
fn block_cleaned_verti(&mut self, byte_idx: u8, bit: u8) {
|
||||
self.verticals[byte_idx as usize] |= 1 << bit;
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn can_walk_between_cleaned_hori(&self, byte_idx: u8, bit: u8) -> bool {
|
||||
fn can_walk_between_cleaned_hori(&self, byte_idx: u8, bit: u8) -> bool {
|
||||
(self.horizontals[byte_idx as usize] >> bit) & 1 != 0
|
||||
}
|
||||
#[inline]
|
||||
pub fn can_walk_between_cleaned_verti(&self, byte_idx: u8, bit: u8) -> bool {
|
||||
fn can_walk_between_cleaned_verti(&self, byte_idx: u8, bit: u8) -> bool {
|
||||
(self.verticals[byte_idx as usize] >> bit) & 1 != 0
|
||||
}
|
||||
|
||||
pub fn block(&mut self, from_x: u8, from_y: u8, to_x: u8, to_y: u8) {
|
||||
fn block(&mut self, from_x: u8, from_y: u8, to_x: u8, to_y: u8) {
|
||||
match (from_x.wrapping_sub(to_x), from_y.wrapping_sub(to_y)) {
|
||||
(1, 0) => self.block_cleaned_verti(to_y, to_x),
|
||||
(0xff, 0) => self.block_cleaned_verti(from_y, from_x),
|
||||
|
|
@ -92,6 +95,24 @@ impl WallState {
|
|||
_ => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn can_place(&self, x: u8, y: u8, vertical: bool) -> bool {
|
||||
if vertical {
|
||||
self.can_walk_between(x, y, x + 1, y) && self.can_walk_between(x, y + 1, x + 1, y + 1)
|
||||
} else {
|
||||
self.can_walk_between(x, y, x, y + 1) && self.can_walk_between(x + 1, y, x + 1, y + 1)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn place(&mut self, x: u8, y: u8, vertical: bool) {
|
||||
if vertical {
|
||||
self.block(x, y, x + 1, y);
|
||||
self.block(x, y + 1, x + 1, y + 1);
|
||||
} else {
|
||||
self.block(x, y, x, y + 1);
|
||||
self.block(x + 1, y, x + 1, y + 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for WallState {
|
||||
|
|
@ -103,30 +124,55 @@ impl Default for WallState {
|
|||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum PlayerIdentifier {
|
||||
P1,
|
||||
P2,
|
||||
}
|
||||
|
||||
impl PlayerIdentifier {
|
||||
pub fn swap(&mut self) {
|
||||
use PlayerIdentifier::*;
|
||||
*self = match self {
|
||||
P1 => P2,
|
||||
P2 => P1,
|
||||
};
|
||||
}
|
||||
|
||||
pub const fn y_goal(&self) -> u8 {
|
||||
match self {
|
||||
PlayerIdentifier::P1 => 8,
|
||||
PlayerIdentifier::P2 => 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct GameState {
|
||||
pub p1: PlayerState,
|
||||
pub p2: PlayerState,
|
||||
pub walls: WallState,
|
||||
pub whose_turn: bool,
|
||||
pub current_player: PlayerIdentifier,
|
||||
}
|
||||
|
||||
impl GameState {
|
||||
pub fn current_player(&self) -> &PlayerState {
|
||||
if self.whose_turn { &self.p1 } else { &self.p2 }
|
||||
pub fn current_player_state(&self) -> &PlayerState {
|
||||
match self.current_player {
|
||||
PlayerIdentifier::P1 => &self.p1,
|
||||
PlayerIdentifier::P2 => &self.p2,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn current_player_mut(&mut self) -> &mut PlayerState {
|
||||
if self.whose_turn {
|
||||
&mut self.p1
|
||||
} else {
|
||||
&mut self.p2
|
||||
pub fn current_player_state_mut(&mut self) -> &mut PlayerState {
|
||||
match self.current_player {
|
||||
PlayerIdentifier::P1 => &mut self.p1,
|
||||
PlayerIdentifier::P2 => &mut self.p2,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn mcts_result(&self) -> Option<f64> {
|
||||
let p1_won = self.p1.y() == 8;
|
||||
let p2_won = self.p2.y() == 0;
|
||||
let p1_won = self.p1.y() == PlayerIdentifier::P1.y_goal();
|
||||
let p2_won = self.p2.y() == PlayerIdentifier::P2.y_goal();
|
||||
|
||||
let outcome_for_p1 = match (p1_won, p2_won) {
|
||||
(false, false) => return None,
|
||||
|
|
@ -135,12 +181,9 @@ impl GameState {
|
|||
(true, true) => 0.0,
|
||||
};
|
||||
|
||||
Some(if self.whose_turn {
|
||||
//p1 wants to win
|
||||
outcome_for_p1
|
||||
} else {
|
||||
//p2 wants to win
|
||||
-1.0 * outcome_for_p1
|
||||
Some(match self.current_player {
|
||||
PlayerIdentifier::P1 => outcome_for_p1,
|
||||
PlayerIdentifier::P2 => -1.0 * outcome_for_p1,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
@ -151,13 +194,15 @@ impl Default for GameState {
|
|||
p1: PlayerState::P1_START,
|
||||
p2: PlayerState::P2_START,
|
||||
walls: Default::default(),
|
||||
whose_turn: true,
|
||||
current_player: PlayerIdentifier::P1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for GameState {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let dm = GoalDistanceMap::new(&self.walls, self.current_player);
|
||||
|
||||
writeln!(
|
||||
f,
|
||||
"P1: {}, P2: {}\n",
|
||||
|
|
@ -191,12 +236,12 @@ impl Display for GameState {
|
|||
};
|
||||
write!(f, "{wall}")?;
|
||||
}
|
||||
let player = if self.p1.x() == x && 8 - self.p1.y() == y {
|
||||
"P1"
|
||||
} else if self.p2.x() == x && 8 - self.p2.y() == y {
|
||||
"P2"
|
||||
let player = if self.p1.x() == x && self.p1.y() == y {
|
||||
"\x1b[1mP1\x1b[0m".to_string()
|
||||
} else if self.p2.x() == x && self.p2.y() == y {
|
||||
"\x1b[1mP2\x1b[0m".to_string()
|
||||
} else {
|
||||
" "
|
||||
format!("{:^2}", dm.at(x, y))
|
||||
};
|
||||
write!(f, "{player}")?;
|
||||
}
|
||||
|
|
@ -209,7 +254,7 @@ impl Display for GameState {
|
|||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::gamestate::{GameState, PlayerState, WallState};
|
||||
use crate::gamestate::WallState;
|
||||
|
||||
#[test]
|
||||
fn test_blocking() {
|
||||
|
|
@ -233,14 +278,5 @@ mod tests {
|
|||
assert!(w.can_walk_between(7, 0, 8, 0));
|
||||
w.block(7, 0, 8, 0);
|
||||
assert!(!w.can_walk_between(7, 0, 8, 0));
|
||||
|
||||
println!(
|
||||
"{}",
|
||||
GameState {
|
||||
p1: PlayerState::P1_START,
|
||||
p2: PlayerState::P2_START,
|
||||
walls: w,
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
92
src/main.rs
92
src/main.rs
|
|
@ -1,4 +1,3 @@
|
|||
use std::error::Error;
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
|
||||
|
|
@ -8,8 +7,10 @@ use simple_mcts::Mcts;
|
|||
use simple_mcts::MctsError;
|
||||
|
||||
use crate::gamestate::GameState;
|
||||
use crate::gamestate::PlayerIdentifier;
|
||||
|
||||
mod gamestate;
|
||||
mod pathfind;
|
||||
|
||||
struct Quoridor {
|
||||
state: GameState,
|
||||
|
|
@ -38,23 +39,26 @@ impl Game<NUM_NEXT_STATES> for Quoridor {
|
|||
fn get_actions(&self) -> [bool; NUM_NEXT_STATES] {
|
||||
let mut res = [false; NUM_NEXT_STATES];
|
||||
|
||||
if self.state.current_player_state().walls_left > 0 {
|
||||
for x in 0..8 {
|
||||
for y in 0..8 {
|
||||
if self.state.walls.can_walk_between(x, y, x + 1, y)
|
||||
&& self.state.walls.can_walk_between(x, y + 1, x + 1, y + 1)
|
||||
{
|
||||
res[x as usize * 8 + y as usize] = true;
|
||||
res[x as usize * 8 + y as usize] = self.state.walls.can_place(x, y, false);
|
||||
}
|
||||
|
||||
if self.state.walls.can_walk_between(x, y, x, y + 1)
|
||||
&& self.state.walls.can_walk_between(x + 1, y, x + 1, y + 1)
|
||||
{
|
||||
res[x as usize * 8 + y as usize + 64] = true;
|
||||
res[x as usize * 8 + y as usize + 64] =
|
||||
self.state.walls.can_place(x, y, true);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let p = self.state.current_player();
|
||||
let p = self.state.current_player_state();
|
||||
let x = p.x();
|
||||
let y = p.y();
|
||||
res[128] = if x == 0 {
|
||||
|
|
@ -78,8 +82,6 @@ impl Game<NUM_NEXT_STATES> for Quoridor {
|
|||
self.state.walls.can_walk_between(x, y, x, y + 1)
|
||||
};
|
||||
// TODO: detect jumps for the 4 directions and also blocked jumps for the 2 sides of the other pawn
|
||||
|
||||
// dbg!(&res[128..132]);
|
||||
res
|
||||
}
|
||||
|
||||
|
|
@ -88,56 +90,40 @@ impl Game<NUM_NEXT_STATES> for Quoridor {
|
|||
}
|
||||
|
||||
fn play(&mut self, action: usize) {
|
||||
if action <= 128 {
|
||||
self.state.current_player_state_mut().walls_left -= 1;
|
||||
}
|
||||
let mut set_block = |i: usize, vertical| {
|
||||
let x = (i / 8) as u8;
|
||||
let y = (i % 8) as u8;
|
||||
|
||||
if vertical {
|
||||
debug_assert!(self.state.walls.can_walk_between(x, y, x + 1, y));
|
||||
debug_assert!(self.state.walls.can_walk_between(x, y + 1, x + 1, y + 1));
|
||||
|
||||
self.state.walls.block(x, y, x + 1, y);
|
||||
self.state.walls.block(x, y + 1, x + 1, y + 1);
|
||||
|
||||
debug_assert!(!self.state.walls.can_walk_between(x, y, x + 1, y));
|
||||
debug_assert!(!self.state.walls.can_walk_between(x, y + 1, x + 1, y + 1));
|
||||
} else {
|
||||
debug_assert!(self.state.walls.can_walk_between(x, y, x, y + 1));
|
||||
debug_assert!(self.state.walls.can_walk_between(x + 1, y, x + 1, y + 1));
|
||||
|
||||
self.state.walls.block(x, y, x, y + 1);
|
||||
self.state.walls.block(x + 1, y, x + 1, y + 1);
|
||||
|
||||
debug_assert!(!self.state.walls.can_walk_between(x, y, x, y + 1));
|
||||
debug_assert!(!self.state.walls.can_walk_between(x + 1, y, x + 1, y + 1));
|
||||
}
|
||||
self.state.walls.place(x, y, vertical);
|
||||
};
|
||||
|
||||
match action {
|
||||
i @ 0..64 => set_block(i, true),
|
||||
i @ 64..128 => set_block(i - 64, false),
|
||||
128 => {
|
||||
let x = self.state.current_player().x();
|
||||
self.state.current_player_mut().set_x(x - 1);
|
||||
let x = self.state.current_player_state().x();
|
||||
self.state.current_player_state_mut().set_x(x - 1);
|
||||
}
|
||||
129 => {
|
||||
let y = self.state.current_player().y();
|
||||
self.state.current_player_mut().set_y(y - 1);
|
||||
let y = self.state.current_player_state().y();
|
||||
self.state.current_player_state_mut().set_y(y - 1);
|
||||
}
|
||||
130 => {
|
||||
let x = self.state.current_player().x();
|
||||
self.state.current_player_mut().set_x(x + 1);
|
||||
let x = self.state.current_player_state().x();
|
||||
self.state.current_player_state_mut().set_x(x + 1);
|
||||
}
|
||||
131 => {
|
||||
let y = self.state.current_player().y();
|
||||
self.state.current_player_mut().set_x(y + 1);
|
||||
let y = self.state.current_player_state().y();
|
||||
self.state.current_player_state_mut().set_y(y + 1);
|
||||
}
|
||||
132 => todo!(),
|
||||
133 => todo!(),
|
||||
_ => unreachable!(),
|
||||
}
|
||||
|
||||
self.state.whose_turn = !self.state.whose_turn;
|
||||
self.state.current_player.swap();
|
||||
}
|
||||
|
||||
fn get_state(&self) -> Self::State {
|
||||
|
|
@ -153,9 +139,8 @@ impl Game<NUM_NEXT_STATES> for Quoridor {
|
|||
}
|
||||
}
|
||||
|
||||
struct QuoridorEvaluator;
|
||||
|
||||
impl GameEvaluator<Quoridor, NUM_NEXT_STATES> for QuoridorEvaluator {
|
||||
struct ResultEvaluator;
|
||||
impl GameEvaluator<Quoridor, NUM_NEXT_STATES> for ResultEvaluator {
|
||||
fn evaluate(&self, state: GameState) -> (f64, [f64; NUM_NEXT_STATES]) {
|
||||
(
|
||||
state.mcts_result().unwrap_or(0.0),
|
||||
|
|
@ -164,15 +149,31 @@ impl GameEvaluator<Quoridor, NUM_NEXT_STATES> for QuoridorEvaluator {
|
|||
}
|
||||
}
|
||||
|
||||
struct ProgressEvaluator;
|
||||
impl GameEvaluator<Quoridor, NUM_NEXT_STATES> for ProgressEvaluator {
|
||||
fn evaluate(&self, state: GameState) -> (f64, [f64; NUM_NEXT_STATES]) {
|
||||
let progress_across = 8 - state
|
||||
.current_player
|
||||
.y_goal()
|
||||
.abs_diff(state.current_player_state().y());
|
||||
|
||||
(
|
||||
progress_across as f64 / 8.0,
|
||||
[const { 1.0 / NUM_NEXT_STATES as f64 }; NUM_NEXT_STATES],
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
fn main() -> Result<(), MctsError> {
|
||||
let g = Quoridor::default();
|
||||
let mut g = Quoridor::default();
|
||||
g.state.walls.place(4, 4, false);
|
||||
|
||||
let mut mcts: Mcts<Quoridor, _> = Mcts::<Quoridor, _>::new();
|
||||
let evaluator = QuoridorEvaluator;
|
||||
let evaluator = ProgressEvaluator;
|
||||
|
||||
for _ in 0..100 {
|
||||
// Perform 100 MCTS iterations
|
||||
for _ in 0..1000 {
|
||||
for _ in 0..10_000 {
|
||||
mcts.iterate(&evaluator)?;
|
||||
}
|
||||
|
||||
|
|
@ -187,11 +188,18 @@ fn main() -> Result<(), MctsError> {
|
|||
.max_by(|&(_, &a), &(_, &b)| a.partial_cmp(&b).unwrap())
|
||||
.map(|(index, _)| index)
|
||||
.unwrap_or(0); // Default to first action if policy is empty
|
||||
|
||||
// let mut x = policy.iter().enumerate().collect::<Vec<_>>();
|
||||
// x.sort_by(|&(_, &a), &(_, &b)| a.total_cmp(&b));
|
||||
// let top_5 = &x[x.len() - 5..];
|
||||
// println!("{top_5:?}");
|
||||
// let best_action_index = x.last().map(|(index, _)| *index).unwrap_or(0);
|
||||
|
||||
println!("best action: {best_action_index}");
|
||||
mcts.play(best_action_index)?;
|
||||
|
||||
println!("{}", mcts.get_game().state);
|
||||
thread::sleep(Duration::from_millis(50));
|
||||
// thread::sleep(Duration::from_millis(500));
|
||||
}
|
||||
|
||||
// Continue with the next game state
|
||||
|
|
|
|||
44
src/pathfind.rs
Normal file
44
src/pathfind.rs
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
use std::collections::VecDeque;
|
||||
|
||||
use crate::gamestate::{PlayerIdentifier, WallState};
|
||||
|
||||
pub struct GoalDistanceMap {
|
||||
distances: [[u8; 9]; 9],
|
||||
}
|
||||
|
||||
impl GoalDistanceMap {
|
||||
pub fn new(w: &WallState, for_player: PlayerIdentifier) -> Self {
|
||||
let mut todo = VecDeque::with_capacity(9 * 9);
|
||||
let mut res = [[u8::MAX; 9]; 9];
|
||||
|
||||
for i in 0u8..9 {
|
||||
todo.push_back(((i, for_player.y_goal()), 0));
|
||||
}
|
||||
|
||||
while let Some(((x, y), distance)) = todo.pop_front() {
|
||||
if res[y as usize][x as usize] != u8::MAX {
|
||||
continue;
|
||||
}
|
||||
res[y as usize][x as usize] = distance;
|
||||
|
||||
if x > 0 && w.can_walk_between(x, y, x - 1, y) {
|
||||
todo.push_back(((x - 1, y), distance + 1));
|
||||
}
|
||||
if x < 8 && w.can_walk_between(x, y, x + 1, y) {
|
||||
todo.push_back(((x + 1, y), distance + 1));
|
||||
}
|
||||
if y > 0 && w.can_walk_between(x, y, x, y - 1) {
|
||||
todo.push_back(((x, y - 1), distance + 1));
|
||||
}
|
||||
if y < 8 && w.can_walk_between(x, y, x, y + 1) {
|
||||
todo.push_back(((x, y + 1), distance + 1));
|
||||
}
|
||||
}
|
||||
|
||||
Self { distances: res }
|
||||
}
|
||||
|
||||
pub fn at(&self, x: u8, y: u8) -> u8 {
|
||||
self.distances[y as usize][x as usize]
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue