Spaces:
Sleeping
Sleeping
File size: 9,131 Bytes
fe5c39d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 |
"""
Filename: MetaGPT/examples/werewolf_game/evals/eval.py
Created Date: Oct 18, 2023
Updated Date: Oct 24, 2023
Author: [Aria](https://github.com/ariafyy)
Info: eval the Voting Accuracy Rate of non_werewolves and Vote Difficulity
"""
import glob
import os
import re
from pathlib import Path
import numpy as np
import pandas as pd
from tqdm import tqdm
from utils import Utils
from metagpt.const import DEFAULT_WORKSPACE_ROOT, METAGPT_ROOT
from metagpt.environment.werewolf.const import RoleType
class Vote:
"""Vote Evaluation"""
def __init__(self):
self.OUT_PATH = DEFAULT_WORKSPACE_ROOT / "outputs"
os.makedirs(self.OUT_PATH, exist_ok=True)
self.SUB_FOLDER_LIST = ["01-10", "11-20", "21-30"]
def _get_log_fileslist(self, IN_PATH) -> list[str]:
files_list = []
for SUB_FOLDER in self.SUB_FOLDER_LIST:
files_list.extend(glob.glob(str(IN_PATH / SUB_FOLDER / "*.txt")))
return files_list
def extract_votes_from_logs(self, files_list: list):
for in_logfile in tqdm(files_list):
SUB_FOLDER = (Path(in_logfile).parent).stem
out_txtfile = self.OUT_PATH / "# {0}_{1}.txt".format(SUB_FOLDER, Path(in_logfile).stem)
Utils().pick_vote_log(in_logfile, out_txtfile)
votefiles_list = Utils().get_file_list(self.OUT_PATH)
return votefiles_list
@staticmethod
def parse_vote_text2chunks(text: str):
"""
parse each game vote log into text chunks
one chunk example:
['Player1', 'Player2', 'Player3', 'Player5', 'Player6']. Say ONLY: I vote to eliminate ...
Player1(Witch): 49 | I vote to eliminate Player5
Player2(Villager): 49 | I vote to eliminate Player5
Player3(Villager): 49 | I vote to eliminate Player5
Player5(Werewolf): 49 | I vote to eliminate Player6
Player6(Seer): 49 | I vote to eliminate Player5
"""
pattern = re.compile(r"""\[([^\]]+)\]. Say ONLY: I vote to eliminate ...""")
chunks = {}
chunk_id = 0
last_end = 0
for match in pattern.finditer(text):
start = match.start()
chunk = text[last_end:start]
chunks[f"vote_{chunk_id}"] = chunk.strip()
last_end = match.end()
chunk_id += 1
final_chunk = text[last_end:].strip()
if final_chunk:
chunks[f"vote_{chunk_id}"] = final_chunk
return chunks
def _vote_rate_players(self, text: str):
"""
# calculate the rate of goodteam vote werewolves
:example:
input:
['Player1', 'Player2', 'Player3', 'Player5', 'Player6']. Say ONLY: I vote to eliminate ...
Player1(Witch): 49 | I vote to eliminate Player5
Player2(Villager): 49 | I vote to eliminate Player5
Player3(Villager): 49 | I vote to eliminate Player5
Player5(Werewolf): 49 | I vote to eliminate Player6
Player6(Seer): 49 | I vote to eliminate Player5
output:
werewolves: ['Player5']
non_werewolves: ['Player1', 'Player2', 'Player3', 'Player6']
as you can see :Player2(Villager) and Player3(Villager) vote to eliminate Player5(Werewolf)
:return goodteam vote rateability: 100.00%
"""
pattern = re.compile(r"(\w+)\(([^\)]+)\): \d+ \| I vote to eliminate (\w+)")
# find all werewolves
werewolves = []
for match in pattern.finditer(text):
if match.group(2) == RoleType.WEREWOLF.value:
werewolves.append(match.group(1))
# find all non_werewolves
non_werewolves = []
for match in pattern.finditer(text):
if match.group(2) != RoleType.WEREWOLF.value:
non_werewolves.append(match.group(1))
num_non_werewolves = len(non_werewolves)
# count players other than werewolves made the correct votes
correct_votes = 0
for match in pattern.finditer(text):
if match.group(2) != RoleType.WEREWOLF.value and match.group(3) in werewolves:
correct_votes += 1
# cal the rateability of non_werewolves
rate = correct_votes / num_non_werewolves
good_vote_rate = round(rate, 2)
return {"good_vote_rate": good_vote_rate, "werewolves": werewolves, "non_werewolves": non_werewolves}
def get_goodteam_vote_rate(self, text: str) -> float:
goodteam_vote_rate = self._vote_rate_players(text)["good_vote_rate"]
return goodteam_vote_rate
def get_werewolves(self, text: str) -> list:
werewolves_list = self._vote_rate_players(text)["werewolves"]
return werewolves_list
def get_non_werewolves(self, text: str) -> list:
non_werewolves_list = self._vote_rate_players(text)["non_werewolves"]
return non_werewolves_list
def get_votewolf_difficulty(self, werewolves: list, non_werewolves: list) -> str:
num_living_wolfs = len(werewolves)
num_living_players = len(werewolves) + len(non_werewolves)
votewolf_difficulty = "_{0} / {1}".format(num_living_wolfs, num_living_players)
return votewolf_difficulty
def get_result_df(self, out_txtfile: str) -> pd.DataFrame:
"""
folder: sub folders for evals
file: evaluation file, each file represents one game
votes: the number of votes, eg. vote_1 represents the first vote of this game,
good_vote_rate:the rateability of a good person voting against a werewolf,
correct_votes / the total number of players other than werewolves
total_votes:the total number of votes cast
"""
with open(out_txtfile, "r") as out_file:
text = out_file.read()
chunks = self.parse_vote_text2chunks(text)
res = []
for k, v in chunks.items():
if v != "":
chunks_list = list(chunks.keys())
total_votes = len(chunks_list) - 1
werewolves = self.get_werewolves(v)
non_werewolves = self.get_non_werewolves(v)
good_vote_rate = self.get_goodteam_vote_rate(v)
votewolf_difficulty = self.get_votewolf_difficulty(werewolves, non_werewolves)
folder = Utils().filename_to_foldername(out_txtfile)
result = {
"folder": folder,
"file": Path(out_txtfile).stem + ".txt",
"vote_round": k,
"good_vote_rate": good_vote_rate,
"total_votes": total_votes,
"votewolf_difficulty": votewolf_difficulty,
}
res.append(result)
df = pd.DataFrame(res)
return df
def calc_avg_rate(self, IN_PATH) -> pd.DataFrame:
"""
get avg_rate for each game
avg_rate : the good_rate/total number of votes in the game
vote1_rate: First Round Voting Accuracy Rate
"""
infiles_list = self._get_log_fileslist(IN_PATH)
votefiles_list = self.extract_votes_from_logs(infiles_list)
df_list = [self._load_df_from_file(file) for file in votefiles_list]
combined_df = pd.concat(df_list, ignore_index=True)
# calculate the average good_vote_rate for each file
mean_rates = self._calculate_mean_rates(combined_df)
combined_df["avg_rate"] = combined_df["file"].map(mean_rates)
# calculate vote1 rate
vote1_rates = self._calc_vote1_rates(combined_df)
combined_df["vote1_rate"] = combined_df["folder"].map(vote1_rates.set_index("folder")["good_vote_rate"])
combined_df.loc[combined_df["vote_round"] != "vote_1", "vote1_rate"] = np.nan
combined_df["vote1_rate"] = combined_df["vote1_rate"].apply(self._format_rates)
combined_df["good_vote_rate"] = combined_df["good_vote_rate"].apply(self._format_rates)
combined_df["avg_rate"] = combined_df["avg_rate"].apply(self._format_rates)
combined_df.sort_values(["file"], ascending=True, inplace=True)
return combined_df
def _calc_vote1_rates(self, df):
df_vote1 = df[df["vote_round"] == "vote_1"]
vote1_rates = df_vote1.groupby("folder")["good_vote_rate"].mean().reset_index()
return vote1_rates
def _load_df_from_file(self, file):
return self.get_result_df(file)
def _calculate_mean_rates(self, df):
return df.groupby("file")["good_vote_rate"].mean()
def _format_rates(self, s):
return Utils().float_to_percent(s)
def get_eval_csv(self, IN_PATH, EVAL_RESULT):
"""
IN_PATH : parent folder of ["01-10", "11-20", "21-30"]
EVAL_RESULT : output csv file path
"""
combined_df = self.calc_avg_rate(IN_PATH)
combined_df.to_csv(EVAL_RESULT, index=False)
if __name__ == "__main__":
IN_PATH = METAGPT_ROOT / "examples/werewolf_game/evals"
EVAL_RESULT = DEFAULT_WORKSPACE_ROOT / "outputs" / "goodteam_vote_rate.csv"
Vote().get_eval_csv(IN_PATH, EVAL_RESULT)
|