|
""" |
|
Filename: MetaGPT/examples/werewolf_game/evals/eval.py |
|
Created Date: Oct 18, 2023 |
|
Updated Date: Oct 24, 2023 |
|
Author: [Aria](https://github.com/ariafyy) |
|
Info: eval the Voting Accuracy Rate of non_werewolves and Vote Difficulity |
|
""" |
|
|
|
import glob |
|
import os |
|
import re |
|
from pathlib import Path |
|
|
|
import numpy as np |
|
import pandas as pd |
|
from tqdm import tqdm |
|
from utils import Utils |
|
|
|
from metagpt.const import DEFAULT_WORKSPACE_ROOT, METAGPT_ROOT |
|
from metagpt.environment.werewolf.const import RoleType |
|
|
|
|
|
class Vote: |
|
"""Vote Evaluation""" |
|
|
|
def __init__(self): |
|
self.OUT_PATH = DEFAULT_WORKSPACE_ROOT / "outputs" |
|
os.makedirs(self.OUT_PATH, exist_ok=True) |
|
self.SUB_FOLDER_LIST = ["01-10", "11-20", "21-30"] |
|
|
|
def _get_log_fileslist(self, IN_PATH) -> list[str]: |
|
files_list = [] |
|
for SUB_FOLDER in self.SUB_FOLDER_LIST: |
|
files_list.extend(glob.glob(str(IN_PATH / SUB_FOLDER / "*.txt"))) |
|
return files_list |
|
|
|
def extract_votes_from_logs(self, files_list: list): |
|
for in_logfile in tqdm(files_list): |
|
SUB_FOLDER = (Path(in_logfile).parent).stem |
|
out_txtfile = self.OUT_PATH / "# {0}_{1}.txt".format(SUB_FOLDER, Path(in_logfile).stem) |
|
Utils().pick_vote_log(in_logfile, out_txtfile) |
|
votefiles_list = Utils().get_file_list(self.OUT_PATH) |
|
return votefiles_list |
|
|
|
@staticmethod |
|
def parse_vote_text2chunks(text: str): |
|
""" |
|
parse each game vote log into text chunks |
|
|
|
one chunk example: |
|
['Player1', 'Player2', 'Player3', 'Player5', 'Player6']. Say ONLY: I vote to eliminate ... |
|
Player1(Witch): 49 | I vote to eliminate Player5 |
|
Player2(Villager): 49 | I vote to eliminate Player5 |
|
Player3(Villager): 49 | I vote to eliminate Player5 |
|
Player5(Werewolf): 49 | I vote to eliminate Player6 |
|
Player6(Seer): 49 | I vote to eliminate Player5 |
|
""" |
|
pattern = re.compile(r"""\[([^\]]+)\]. Say ONLY: I vote to eliminate ...""") |
|
chunks = {} |
|
chunk_id = 0 |
|
last_end = 0 |
|
for match in pattern.finditer(text): |
|
start = match.start() |
|
chunk = text[last_end:start] |
|
chunks[f"vote_{chunk_id}"] = chunk.strip() |
|
last_end = match.end() |
|
chunk_id += 1 |
|
final_chunk = text[last_end:].strip() |
|
if final_chunk: |
|
chunks[f"vote_{chunk_id}"] = final_chunk |
|
return chunks |
|
|
|
def _vote_rate_players(self, text: str): |
|
""" |
|
# calculate the rate of goodteam vote werewolves |
|
:example: |
|
|
|
input: |
|
['Player1', 'Player2', 'Player3', 'Player5', 'Player6']. Say ONLY: I vote to eliminate ... |
|
Player1(Witch): 49 | I vote to eliminate Player5 |
|
Player2(Villager): 49 | I vote to eliminate Player5 |
|
Player3(Villager): 49 | I vote to eliminate Player5 |
|
Player5(Werewolf): 49 | I vote to eliminate Player6 |
|
Player6(Seer): 49 | I vote to eliminate Player5 |
|
|
|
output: |
|
werewolves: ['Player5'] |
|
non_werewolves: ['Player1', 'Player2', 'Player3', 'Player6'] |
|
as you can see :Player2(Villager) and Player3(Villager) vote to eliminate Player5(Werewolf) |
|
:return goodteam vote rateability: 100.00% |
|
""" |
|
pattern = re.compile(r"(\w+)\(([^\)]+)\): \d+ \| I vote to eliminate (\w+)") |
|
|
|
werewolves = [] |
|
for match in pattern.finditer(text): |
|
if match.group(2) == RoleType.WEREWOLF.value: |
|
werewolves.append(match.group(1)) |
|
|
|
|
|
non_werewolves = [] |
|
for match in pattern.finditer(text): |
|
if match.group(2) != RoleType.WEREWOLF.value: |
|
non_werewolves.append(match.group(1)) |
|
num_non_werewolves = len(non_werewolves) |
|
|
|
|
|
correct_votes = 0 |
|
for match in pattern.finditer(text): |
|
if match.group(2) != RoleType.WEREWOLF.value and match.group(3) in werewolves: |
|
correct_votes += 1 |
|
|
|
|
|
rate = correct_votes / num_non_werewolves |
|
good_vote_rate = round(rate, 2) |
|
return {"good_vote_rate": good_vote_rate, "werewolves": werewolves, "non_werewolves": non_werewolves} |
|
|
|
def get_goodteam_vote_rate(self, text: str) -> float: |
|
goodteam_vote_rate = self._vote_rate_players(text)["good_vote_rate"] |
|
return goodteam_vote_rate |
|
|
|
def get_werewolves(self, text: str) -> list: |
|
werewolves_list = self._vote_rate_players(text)["werewolves"] |
|
return werewolves_list |
|
|
|
def get_non_werewolves(self, text: str) -> list: |
|
non_werewolves_list = self._vote_rate_players(text)["non_werewolves"] |
|
return non_werewolves_list |
|
|
|
def get_votewolf_difficulty(self, werewolves: list, non_werewolves: list) -> str: |
|
num_living_wolfs = len(werewolves) |
|
num_living_players = len(werewolves) + len(non_werewolves) |
|
votewolf_difficulty = "_{0} / {1}".format(num_living_wolfs, num_living_players) |
|
return votewolf_difficulty |
|
|
|
def get_result_df(self, out_txtfile: str) -> pd.DataFrame: |
|
""" |
|
folder: sub folders for evals |
|
file: evaluation file, each file represents one game |
|
votes: the number of votes, eg. vote_1 represents the first vote of this game, |
|
good_vote_rate:the rateability of a good person voting against a werewolf, |
|
correct_votes / the total number of players other than werewolves |
|
total_votes:the total number of votes cast |
|
""" |
|
with open(out_txtfile, "r") as out_file: |
|
text = out_file.read() |
|
chunks = self.parse_vote_text2chunks(text) |
|
res = [] |
|
for k, v in chunks.items(): |
|
if v != "": |
|
chunks_list = list(chunks.keys()) |
|
total_votes = len(chunks_list) - 1 |
|
werewolves = self.get_werewolves(v) |
|
non_werewolves = self.get_non_werewolves(v) |
|
good_vote_rate = self.get_goodteam_vote_rate(v) |
|
votewolf_difficulty = self.get_votewolf_difficulty(werewolves, non_werewolves) |
|
folder = Utils().filename_to_foldername(out_txtfile) |
|
result = { |
|
"folder": folder, |
|
"file": Path(out_txtfile).stem + ".txt", |
|
"vote_round": k, |
|
"good_vote_rate": good_vote_rate, |
|
"total_votes": total_votes, |
|
"votewolf_difficulty": votewolf_difficulty, |
|
} |
|
res.append(result) |
|
df = pd.DataFrame(res) |
|
return df |
|
|
|
def calc_avg_rate(self, IN_PATH) -> pd.DataFrame: |
|
""" |
|
get avg_rate for each game |
|
avg_rate : the good_rate/total number of votes in the game |
|
vote1_rate: First Round Voting Accuracy Rate |
|
""" |
|
infiles_list = self._get_log_fileslist(IN_PATH) |
|
votefiles_list = self.extract_votes_from_logs(infiles_list) |
|
df_list = [self._load_df_from_file(file) for file in votefiles_list] |
|
combined_df = pd.concat(df_list, ignore_index=True) |
|
|
|
mean_rates = self._calculate_mean_rates(combined_df) |
|
combined_df["avg_rate"] = combined_df["file"].map(mean_rates) |
|
|
|
vote1_rates = self._calc_vote1_rates(combined_df) |
|
combined_df["vote1_rate"] = combined_df["folder"].map(vote1_rates.set_index("folder")["good_vote_rate"]) |
|
combined_df.loc[combined_df["vote_round"] != "vote_1", "vote1_rate"] = np.nan |
|
combined_df["vote1_rate"] = combined_df["vote1_rate"].apply(self._format_rates) |
|
combined_df["good_vote_rate"] = combined_df["good_vote_rate"].apply(self._format_rates) |
|
combined_df["avg_rate"] = combined_df["avg_rate"].apply(self._format_rates) |
|
combined_df.sort_values(["file"], ascending=True, inplace=True) |
|
return combined_df |
|
|
|
def _calc_vote1_rates(self, df): |
|
df_vote1 = df[df["vote_round"] == "vote_1"] |
|
vote1_rates = df_vote1.groupby("folder")["good_vote_rate"].mean().reset_index() |
|
return vote1_rates |
|
|
|
def _load_df_from_file(self, file): |
|
return self.get_result_df(file) |
|
|
|
def _calculate_mean_rates(self, df): |
|
return df.groupby("file")["good_vote_rate"].mean() |
|
|
|
def _format_rates(self, s): |
|
return Utils().float_to_percent(s) |
|
|
|
def get_eval_csv(self, IN_PATH, EVAL_RESULT): |
|
""" |
|
IN_PATH : parent folder of ["01-10", "11-20", "21-30"] |
|
EVAL_RESULT : output csv file path |
|
""" |
|
combined_df = self.calc_avg_rate(IN_PATH) |
|
combined_df.to_csv(EVAL_RESULT, index=False) |
|
|
|
|
|
if __name__ == "__main__": |
|
IN_PATH = METAGPT_ROOT / "examples/werewolf_game/evals" |
|
EVAL_RESULT = DEFAULT_WORKSPACE_ROOT / "outputs" / "goodteam_vote_rate.csv" |
|
Vote().get_eval_csv(IN_PATH, EVAL_RESULT) |
|
|