Spaces:
Sleeping
Sleeping
import numpy as np | |
def pfsp(win_rates: np.ndarray, weighting: str) -> np.ndarray: | |
""" | |
Overview: | |
Prioritized Fictitious Self-Play algorithm. | |
Process win_rates with a weighting function to get priority, then calculate the selection probability of each. | |
Arguments: | |
- win_rates (:obj:`np.ndarray`): a numpy ndarray of win rates between one player and N opponents, shape(N) | |
- weighting (:obj:`str`): pfsp weighting function type, refer to ``weighting_func`` below | |
Returns: | |
- probs (:obj:`np.ndarray`): a numpy ndarray of probability at which one element is selected, shape(N) | |
""" | |
weighting_func = { | |
'squared': lambda x: (1 - x) ** 2, | |
'variance': lambda x: x * (1 - x), | |
} | |
if weighting in weighting_func.keys(): | |
fn = weighting_func[weighting] | |
else: | |
raise KeyError("invalid weighting arg: {} in pfsp".format(weighting)) | |
assert isinstance(win_rates, np.ndarray) | |
assert win_rates.shape[0] >= 1, win_rates.shape | |
# all zero win rates case, return uniform selection prob | |
if win_rates.sum() < 1e-8: | |
return np.full_like(win_rates, 1.0 / len(win_rates)) | |
fn_win_rates = fn(win_rates) | |
probs = fn_win_rates / fn_win_rates.sum() | |
return probs | |
def uniform(win_rates: np.ndarray) -> np.ndarray: | |
""" | |
Overview: | |
Uniform opponent selection algorithm. Select an opponent uniformly, regardless of historical win rates. | |
Arguments: | |
- win_rates (:obj:`np.ndarray`): a numpy ndarray of win rates between one player and N opponents, shape(N) | |
Returns: | |
- probs (:obj:`np.ndarray`): a numpy ndarray of uniform probability, shape(N) | |
""" | |
return np.full_like(win_rates, 1.0 / len(win_rates)) | |