Spaces:

marimo-team
/

marimo-learn

Running

App Files Files Community

metaboulie commited on about 1 month ago

Commit

8177ad2

2 Parent(s): e9e13d8 8bc43c8

Merge remote-tracking branch 'upstream/main' into fp/applicatives

Browse files

Files changed (21) hide show

.github/workflows/deploy.yml +56 -0
.github/workflows/hf_sync.yml +27 -1
.gitignore +4 -1
Dockerfile +13 -0
Makefile +9 -0
README.md +15 -0
_server/README.md +3 -0
polars/01_why_polars.py +1 -1
probability/11_expectation.py +860 -0
probability/12_variance.py +631 -0
probability/13_bernoulli_distribution.py +427 -0
probability/14_binomial_distribution.py +545 -0
probability/15_poisson_distribution.py +805 -0
probability/16_continuous_distribution.py +979 -0
probability/17_normal_distribution.py +1127 -0
probability/18_central_limit_theorem.py +943 -0
probability/19_maximum_likelihood_estimation.py +1231 -0
python/006_dictionaries.py +2 -2
scripts/build.py +281 -0
scripts/preview.py +76 -0
scripts/templates/index.html +174 -0

.github/workflows/deploy.yml ADDED Viewed

	@@ -0,0 +1,56 @@

+name: Deploy to GitHub Pages
+on:
+  push:
+    branches: ['main']
+  workflow_dispatch:
+concurrency:
+  group: 'pages'
+  cancel-in-progress: false
+env:
+  UV_SYSTEM_PYTHON: 1
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: 🚀 Install uv
+        uses: astral-sh/setup-uv@v4
+      - name: 🐍 Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: 3.12
+      - name: 📦 Install dependencies
+        run: |
+          uv pip install marimo jinja2
+      - name: 🛠️ Export notebooks
+        run: |
+          python scripts/build.py
+      - name: 📤 Upload artifact
+        uses: actions/upload-pages-artifact@v3
+        with:
+          path: _site
+  deploy:
+    needs: build
+    permissions:
+      pages: write
+      id-token: write
+    environment:
+      name: github-pages
+      url: ${{ steps.deployment.outputs.page_url }}
+    runs-on: ubuntu-latest
+    steps:
+      - name: 🚀 Deploy to GitHub Pages
+        id: deployment
+        uses: actions/deploy-pages@v4

.github/workflows/hf_sync.yml CHANGED Viewed

@@ -13,7 +13,33 @@ jobs:
       - uses: actions/checkout@v4
         with:
           fetch-depth: 0
       - name: Push to hub
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
-        run: git push https://mylessss:[email protected]/spaces/marimo-team/marimo-learn main

       - uses: actions/checkout@v4
         with:
           fetch-depth: 0
+      - name: Configure Git
+        run: |
+          git config --global user.name "GitHub Action"
+          git config --global user.email "[email protected]"
+      - name: Prepend frontmatter to README
+        run: |
+          if [ -f README.md ] && ! grep -q "^---" README.md; then
+            FRONTMATTER="---
+          title: marimo learn
+          emoji: 🧠
+          colorFrom: blue
+          colorTo: indigo
+          sdk: docker
+          sdk_version: \"latest\"
+          app_file: app.py
+          pinned: false
+          ---
+          "
+            echo "$FRONTMATTER$(cat README.md)" > README.md
+            git add README.md
+            git commit -m "Add HF frontmatter to README" || echo "No changes to commit"
+          fi
       - name: Push to hub
         env:
           HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        run: git push -f https://mylessss:[email protected]/spaces/marimo-team/marimo-learn main

.gitignore CHANGED Viewed

@@ -168,4 +168,7 @@ cython_debug/
 #.idea/
 # PyPI configuration file
-.pypirc

 #.idea/
 # PyPI configuration file
+.pypirc
+# Generated site content
+_site/

Dockerfile CHANGED Viewed

@@ -1,9 +1,22 @@
 FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim
 COPY _server/main.py _server/main.py
 COPY polars/ polars/
 COPY duckdb/ duckdb/
 RUN uv venv
 RUN uv export --script _server/main.py | uv pip install -r -

 FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim
+WORKDIR /app
+# Create a non-root user
+RUN useradd -m appuser
+# Copy application files
 COPY _server/main.py _server/main.py
 COPY polars/ polars/
 COPY duckdb/ duckdb/
+# Set proper ownership
+RUN chown -R appuser:appuser /app
+# Switch to non-root user
+USER appuser
+# Create virtual environment and install dependencies
 RUN uv venv
 RUN uv export --script _server/main.py | uv pip install -r -

Makefile ADDED Viewed

	@@ -0,0 +1,9 @@

+install:
+	uv pip install marimo jinja2 markdown
+build:
+	rm -rf _site
+	uv run scripts/build.py
+serve:
+	uv run python -m http.server --directory _site

README.md CHANGED Viewed

@@ -58,6 +58,21 @@ Here's a contribution checklist:
 If you aren't comfortable adding a new notebook or course, you can also request
 what you'd like to see by [filing an issue](https://github.com/marimo-team/learn/issues/new?template=example_request.yaml).
 ## Community
 We're building a community. Come hang out with us!

 If you aren't comfortable adding a new notebook or course, you can also request
 what you'd like to see by [filing an issue](https://github.com/marimo-team/learn/issues/new?template=example_request.yaml).
+## Building and Previewing
+The site is built using a Python script that exports marimo notebooks to HTML and generates an index page.
+```bash
+# Build the site
+python scripts/build.py --output-dir _site
+# Preview the site (builds first)
+python scripts/preview.py
+# Preview without rebuilding
+python scripts/preview.py --no-build
+```
 ## Community
 We're building a community. Come hang out with us!

_server/README.md CHANGED Viewed

@@ -5,11 +5,14 @@ This folder contains server code for hosting marimo apps.
 ## Running the server
 ```bash
 uv run --no-project main.py
 ```
 ## Building a Docker image
 ```bash
 docker build -t marimo-learn .
 ```

 ## Running the server
 ```bash
+cd _server
 uv run --no-project main.py
 ```
 ## Building a Docker image
+From the root directory, run:
 ```bash
 docker build -t marimo-learn .
 ```

polars/01_why_polars.py CHANGED Viewed

@@ -57,7 +57,7 @@ def _(mo):
         """
         Unlike Python's earliest DataFrame library Pandas, Polars was designed with performance and usability in mind — Polars can scale to large datasets with ease while maintaining a simple and intuitive API.
-        Polars' performance is due to a number of factors, including its implementation and rust and its ability to perform operations in a parallelized and vectorized manner. It supports a wide range of data types, advanced query optimizations, and seamless integration with other Python libraries, making it a versatile tool for data scientists, engineers, and analysts. Additionally, Polars provides a lazy API for deferred execution, allowing users to optimize their workflows by chaining operations and executing them in a single pass.
         With its focus on speed, scalability, and ease of use, Polars is quickly becoming a go-to choice for data professionals looking to streamline their data processing pipelines and tackle large-scale data challenges.
         """

         """
         Unlike Python's earliest DataFrame library Pandas, Polars was designed with performance and usability in mind — Polars can scale to large datasets with ease while maintaining a simple and intuitive API.
+        Polars' performance is due to a number of factors, including its implementation in rust and its ability to perform operations in a parallelized and vectorized manner. It supports a wide range of data types, advanced query optimizations, and seamless integration with other Python libraries, making it a versatile tool for data scientists, engineers, and analysts. Additionally, Polars provides a lazy API for deferred execution, allowing users to optimize their workflows by chaining operations and executing them in a single pass.
         With its focus on speed, scalability, and ease of use, Polars is quickly becoming a go-to choice for data professionals looking to streamline their data processing pipelines and tackle large-scale data challenges.
         """

probability/11_expectation.py ADDED Viewed

	@@ -0,0 +1,860 @@

+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "marimo",
+#     "matplotlib==3.10.0",
+#     "numpy==2.2.3",
+#     "scipy==1.15.2",
+# ]
+# ///
+import marimo
+__generated_with = "0.11.19"
+app = marimo.App(width="medium", app_title="Expectation")
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        # Expectation
+        _This notebook is a computational companion to ["Probability for Computer Scientists"](https://chrispiech.github.io/probabilityForComputerScientists/en/part2/expectation/), by Stanford professor Chris Piech._
+        A random variable is fully represented by its Probability Mass Function (PMF), which describes each value the random variable can take on and the corresponding probabilities. However, a PMF can contain a lot of information. Sometimes it's useful to summarize a random variable with a single value!
+        The most common, and arguably the most useful, summary of a random variable is its **Expectation** (also called the expected value or mean).
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Definition of Expectation
+        The expectation of a random variable $X$, written $E[X]$, is the average of all the values the random variable can take on, each weighted by the probability that the random variable will take on that value.
+        $$E[X] = \sum_x x \cdot P(X=x)$$
+        Expectation goes by many other names: Mean, Weighted Average, Center of Mass, 1st Moment. All of these are calculated using the same formula.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Intuition Behind Expectation
+        The expected value represents the long-run average value of a random variable over many independent repetitions of an experiment.
+        For example, if you roll a fair six-sided die many times and calculate the average of all rolls, that average will approach the expected value of 3.5 as the number of rolls increases.
+        Let's visualize this concept:
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(np, plt):
+    # Set random seed for reproducibility
+    np.random.seed(42)
+    # Simulate rolling a die many times
+    exp_num_rolls = 1000
+    exp_die_rolls = np.random.randint(1, 7, size=exp_num_rolls)
+    # Calculate the running average
+    exp_running_avg = np.cumsum(exp_die_rolls) / np.arange(1, exp_num_rolls + 1)
+    # Create the plot
+    plt.figure(figsize=(10, 5))
+    plt.plot(range(1, exp_num_rolls + 1), exp_running_avg, label='Running Average')
+    plt.axhline(y=3.5, color='r', linestyle='--', label='Expected Value (3.5)')
+    plt.xlabel('Number of Rolls')
+    plt.ylabel('Average Value')
+    plt.title('Running Average of Die Rolls Approaching Expected Value')
+    plt.legend()
+    plt.grid(alpha=0.3)
+    plt.xscale('log')  # Log scale to better see convergence
+    # Add annotations
+    plt.annotate('As the number of rolls increases,\nthe average approaches the expected value',
+                xy=(exp_num_rolls, exp_running_avg[-1]), xytext=(exp_num_rolls/3, 4),
+                arrowprops=dict(facecolor='black', shrink=0.05, width=1.5))
+    plt.gca()
+    return exp_die_rolls, exp_num_rolls, exp_running_avg
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""## Properties of Expectation""")
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.accordion(
+        {
+            "1. Linearity of Expectation": mo.md(
+                r"""
+                $$E[aX + b] = a \cdot E[X] + b$$
+                Where $a$ and $b$ are constants (not random variables).
+                This means that if you multiply a random variable by a constant, the expectation is multiplied by that constant. And if you add a constant to a random variable, the expectation increases by that constant.
+                """
+            ),
+            "2. Expectation of the Sum of Random Variables": mo.md(
+                r"""
+                $$E[X + Y] = E[X] + E[Y]$$
+                This is true regardless of the relationship between $X$ and $Y$. They can be dependent, and they can have different distributions. This also applies with more than two random variables:
+                $$E\left[\sum_{i=1}^n X_i\right] = \sum_{i=1}^n E[X_i]$$
+                """
+            ),
+            "3. Law of the Unconscious Statistician (LOTUS)": mo.md(
+                r"""
+                $$E[g(X)] = \sum_x g(x) \cdot P(X=x)$$
+                This allows us to calculate the expected value of a function $g(X)$ of a random variable $X$ when we know the probability distribution of $X$ but don't explicitly know the distribution of $g(X)$.
+                This theorem has the humorous name "Law of the Unconscious Statistician" (LOTUS) because it's so useful that you should be able to employ it unconsciously.
+                """
+            ),
+            "4. Expectation of a Constant": mo.md(
+                r"""
+                $$E[a] = a$$
+                Sometimes in proofs, you'll end up with the expectation of a constant (rather than a random variable). Since a constant doesn't change, its expected value is just the constant itself.
+                """
+            ),
+        }
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Calculating Expectation
+        Let's calculate the expected value for some common examples:
+        ### Example 1: Fair Die Roll
+        For a fair six-sided die, the PMF is:
+        $$P(X=x) = \frac{1}{6} \text{ for } x \in \{1, 2, 3, 4, 5, 6\}$$
+        The expected value is:
+        $$E[X] = 1 \cdot \frac{1}{6} + 2 \cdot \frac{1}{6} + 3 \cdot \frac{1}{6} + 4 \cdot \frac{1}{6} + 5 \cdot \frac{1}{6} + 6 \cdot \frac{1}{6} = \frac{21}{6} = 3.5$$
+        Let's implement this calculation in Python:
+        """
+    )
+    return
+@app.cell
+def _():
+    def calc_expectation_die():
+        """Calculate the expected value of a fair six-sided die roll."""
+        exp_die_values = range(1, 7)
+        exp_die_probs = [1/6] * 6
+        exp_die_expected = sum(x * p for x, p in zip(exp_die_values, exp_die_probs))
+        return exp_die_expected
+    exp_die_result = calc_expectation_die()
+    print(f"Expected value of a fair die roll: {exp_die_result}")
+    return calc_expectation_die, exp_die_result
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ### Example 2: Sum of Two Dice
+        Now let's calculate the expected value for the sum of two fair dice. First, we need the PMF:
+        """
+    )
+    return
+@app.cell
+def _():
+    def pmf_sum_two_dice(y_val):
+        """Returns the probability that the sum of two dice is y."""
+        # Count the number of ways to get sum y
+        exp_count = 0
+        for dice1 in range(1, 7):
+            for dice2 in range(1, 7):
+                if dice1 + dice2 == y_val:
+                    exp_count += 1
+        return exp_count / 36  # There are 36 possible outcomes (6Ã—6)
+    # Test the function for a few values
+    exp_test_values = [2, 7, 12]
+    for exp_test_y in exp_test_values:
+        print(f"P(Y = {exp_test_y}) = {pmf_sum_two_dice(exp_test_y)}")
+    return exp_test_values, exp_test_y, pmf_sum_two_dice
+@app.cell
+def _(pmf_sum_two_dice):
+    def calc_expectation_sum_two_dice():
+        """Calculate the expected value of the sum of two dice."""
+        exp_sum_two_dice = 0
+        # Sum of dice can take on the values 2 through 12
+        for exp_x in range(2, 13):
+            exp_pr_x = pmf_sum_two_dice(exp_x)  # PMF gives P(sum is x)
+            exp_sum_two_dice += exp_x * exp_pr_x
+        return exp_sum_two_dice
+    exp_sum_result = calc_expectation_sum_two_dice()
+    # Round to 2 decimal places for display
+    exp_sum_result_rounded = round(exp_sum_result, 2)
+    print(f"Expected value of the sum of two dice: {exp_sum_result_rounded}")
+    # Let's also verify this with a direct calculation
+    exp_direct_calc = sum(x * pmf_sum_two_dice(x) for x in range(2, 13))
+    exp_direct_calc_rounded = round(exp_direct_calc, 2)
+    print(f"Direct calculation: {exp_direct_calc_rounded}")
+    # Verify that this equals 7
+    print(f"Is the expected value exactly 7? {abs(exp_sum_result - 7) < 1e-10}")
+    return (
+        calc_expectation_sum_two_dice,
+        exp_direct_calc,
+        exp_direct_calc_rounded,
+        exp_sum_result,
+        exp_sum_result_rounded,
+    )
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ### Visualizing Expectation
+        Let's visualize the expectation for the sum of two dice. The expected value is the "center of mass" of the PMF:
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(plt, pmf_sum_two_dice):
+    # Create the visualization
+    exp_y_values = list(range(2, 13))
+    exp_probabilities = [pmf_sum_two_dice(y) for y in exp_y_values]
+    dice_fig, dice_ax = plt.subplots(figsize=(10, 5))
+    dice_ax.bar(exp_y_values, exp_probabilities, width=0.4)
+    dice_ax.axvline(x=7, color='r', linestyle='--', linewidth=2, label='Expected Value (7)')
+    dice_ax.set_xticks(exp_y_values)
+    dice_ax.set_xlabel('Sum of two dice (y)')
+    dice_ax.set_ylabel('Probability: P(Y = y)')
+    dice_ax.set_title('PMF of Sum of Two Dice with Expected Value')
+    dice_ax.grid(alpha=0.3)
+    dice_ax.legend()
+    # Add probability values on top of bars
+    for exp_i, exp_prob in enumerate(exp_probabilities):
+        dice_ax.text(exp_y_values[exp_i], exp_prob + 0.001, f'{exp_prob:.3f}', ha='center')
+    plt.tight_layout()
+    plt.gca()
+    return dice_ax, dice_fig, exp_i, exp_prob, exp_probabilities, exp_y_values
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Demonstrating the Properties of Expectation
+        Let's demonstrate some of these properties with examples:
+        """
+    )
+    return
+@app.cell
+def _(exp_die_result):
+    # Demonstrate linearity of expectation (1)
+    # E[aX + b] = a*E[X] + b
+    # For a die roll X with E[X] = 3.5
+    prop_a = 2
+    prop_b = 10
+    # Calculate E[2X + 10] using the property
+    prop_expected_using_property = prop_a * exp_die_result + prop_b
+    prop_expected_using_property_rounded = round(prop_expected_using_property, 2)
+    print(f"Using linearity property: E[{prop_a}X + {prop_b}] = {prop_a} * E[X] + {prop_b} = {prop_expected_using_property_rounded}")
+    # Calculate E[2X + 10] directly
+    prop_expected_direct = sum((prop_a * x + prop_b) * (1/6) for x in range(1, 7))
+    prop_expected_direct_rounded = round(prop_expected_direct, 2)
+    print(f"Direct calculation: E[{prop_a}X + {prop_b}] = {prop_expected_direct_rounded}")
+    # Verify they match
+    print(f"Do they match? {abs(prop_expected_using_property - prop_expected_direct) < 1e-10}")
+    return (
+        prop_a,
+        prop_b,
+        prop_expected_direct,
+        prop_expected_direct_rounded,
+        prop_expected_using_property,
+        prop_expected_using_property_rounded,
+    )
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ### Law of the Unconscious Statistician (LOTUS)
+        Let's use LOTUS to calculate $E[X^2]$ for a die roll, which will be useful when we study variance:
+        """
+    )
+    return
+@app.cell
+def _():
+    # Calculate E[X^2] for a die roll using LOTUS (3)
+    lotus_die_values = range(1, 7)
+    lotus_die_probs = [1/6] * 6
+    # Using LOTUS: E[X^2] = sum(x^2 * P(X=x))
+    lotus_expected_x_squared = sum(x**2 * p for x, p in zip(lotus_die_values, lotus_die_probs))
+    lotus_expected_x_squared_rounded = round(lotus_expected_x_squared, 2)
+    expected_x_squared = 3.5**2
+    expected_x_squared_rounded = round(expected_x_squared, 2)
+    print(f"E[X^2] for a die roll = {lotus_expected_x_squared_rounded}")
+    print(f"(E[X])^2 for a die roll = {expected_x_squared_rounded}")
+    return (
+        expected_x_squared,
+        expected_x_squared_rounded,
+        lotus_die_probs,
+        lotus_die_values,
+        lotus_expected_x_squared,
+        lotus_expected_x_squared_rounded,
+    )
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        /// Note
+        Note that E[X^2] != (E[X])^2
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Interactive Example
+        Let's explore how the expected value changes as we adjust the parameters of common probability distributions. This interactive visualization focuses specifically on the relationship between distribution parameters and expected values.
+        Use the controls below to select a distribution and adjust its parameters. The graph will show how the expected value changes across a range of parameter values.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    # Create UI elements for distribution selection
+    dist_selection = mo.ui.dropdown(
+        options=[
+            "bernoulli",
+            "binomial",
+            "geometric",
+            "poisson"
+        ],
+        value="bernoulli",
+        label="Select a distribution"
+    )
+    return (dist_selection,)
+@app.cell(hide_code=True)
+def _(dist_selection):
+    dist_selection.center()
+    return
+@app.cell(hide_code=True)
+def _(dist_description):
+    dist_description
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md("""### Adjust Parameters""")
+    return
+@app.cell(hide_code=True)
+def _(controls):
+    controls
+    return
+@app.cell(hide_code=True)
+def _(
+    dist_selection,
+    lambda_range,
+    np,
+    param_lambda,
+    param_n,
+    param_p,
+    param_range,
+    plt,
+):
+    # Calculate expected values based on the selected distribution
+    if dist_selection.value == "bernoulli":
+        # Get parameter range for visualization
+        p_min, p_max = param_range.value
+        param_values = np.linspace(p_min, p_max, 100)
+        # E[X] = p for Bernoulli
+        expected_values = param_values
+        current_param = param_p.value
+        current_expected = round(current_param, 2)
+        x_label = "p (probability of success)"
+        title = "Expected Value of Bernoulli Distribution"
+        formula = "E[X] = p"
+    elif dist_selection.value == "binomial":
+        p_min, p_max = param_range.value
+        param_values = np.linspace(p_min, p_max, 100)
+        # E[X] = np for Binomial
+        n = int(param_n.value)
+        expected_values = [n * p for p in param_values]
+        current_param = param_p.value
+        current_expected = round(n * current_param, 2)
+        x_label = "p (probability of success)"
+        title = f"Expected Value of Binomial Distribution (n={n})"
+        formula = f"E[X] = n × p = {n} × p"
+    elif dist_selection.value == "geometric":
+        p_min, p_max = param_range.value
+        # Ensure p is not 0 for geometric distribution
+        p_min = max(0.01, p_min)
+        param_values = np.linspace(p_min, p_max, 100)
+        # E[X] = 1/p for Geometric
+        expected_values = [1/p for p in param_values]
+        current_param = param_p.value
+        current_expected = round(1 / current_param, 2)
+        x_label = "p (probability of success)"
+        title = "Expected Value of Geometric Distribution"
+        formula = "E[X] = 1/p"
+    else:  # Poisson
+        lambda_min, lambda_max = lambda_range.value
+        param_values = np.linspace(lambda_min, lambda_max, 100)
+        # E[X] = lambda for Poisson
+        expected_values = param_values
+        current_param = param_lambda.value
+        current_expected = round(current_param, 2)
+        x_label = "λ (rate parameter)"
+        title = "Expected Value of Poisson Distribution"
+        formula = "E[X] = λ"
+    # Create the plot
+    dist_fig, dist_ax = plt.subplots(figsize=(10, 6))
+    # Plot the expected value function
+    dist_ax.plot(param_values, expected_values, 'b-', linewidth=2, label="Expected Value Function")
+    dist_ax.plot(current_param, current_expected, 'ro', markersize=10, label=f"Current Value: E[X] = {current_expected}")
+    dist_ax.hlines(current_expected, param_values[0], current_param, colors='r', linestyles='dashed')
+    dist_ax.vlines(current_param, 0, current_expected, colors='r', linestyles='dashed')
+    dist_ax.fill_between(param_values, 0, expected_values, alpha=0.2, color='blue')
+    dist_ax.set_xlabel(x_label, fontsize=12)
+    dist_ax.set_ylabel("Expected Value: E[X]", fontsize=12)
+    dist_ax.set_title(title, fontsize=14, fontweight='bold')
+    dist_ax.grid(True, alpha=0.3)
+    # Move legend to lower right to avoid overlap with formula
+    dist_ax.legend(loc='lower right', fontsize=10)
+    # Add formula text box in upper left
+    dist_props = dict(boxstyle='round', facecolor='white', alpha=0.8)
+    dist_ax.text(0.02, 0.95, formula, transform=dist_ax.transAxes, fontsize=12,
+            verticalalignment='top', bbox=dist_props)
+    if dist_selection.value == "geometric":
+        max_y = min(50, 2/max(0.01, param_values[0]))
+        dist_ax.set_ylim(0, max_y)
+    elif dist_selection.value == "binomial":
+        dist_ax.set_ylim(0, int(param_n.value) + 1)
+    else:
+        dist_ax.set_ylim(0, max(expected_values) * 1.1)
+    annotation_x = current_param + (param_values[-1] - param_values[0]) * 0.05
+    annotation_y = current_expected
+    # Adjust annotation position if it would go off the chart
+    if annotation_x > param_values[-1] * 0.9:
+        annotation_x = current_param - (param_values[-1] - param_values[0]) * 0.2
+    dist_ax.annotate(
+        f"Parameter: {current_param:.2f}\nE[X] = {current_expected}",
+        xy=(current_param, current_expected),
+        xytext=(annotation_x, annotation_y),
+        arrowprops=dict(facecolor='black', shrink=0.05, width=1.5, alpha=0.7),
+        bbox=dist_props
+    )
+    plt.tight_layout()
+    plt.gca()
+    return (
+        annotation_x,
+        annotation_y,
+        current_expected,
+        current_param,
+        dist_ax,
+        dist_fig,
+        dist_props,
+        expected_values,
+        formula,
+        lambda_max,
+        lambda_min,
+        max_y,
+        n,
+        p_max,
+        p_min,
+        param_values,
+        title,
+        x_label,
+    )
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Expectation vs. Mode
+        The expected value (mean) of a random variable is not always the same as its most likely value (mode). Let's explore this with an example:
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(np, plt, stats):
+    # Create a skewed distribution
+    skew_n = 10
+    skew_p = 0.25
+    # Binomial PMF
+    skew_x_values = np.arange(0, skew_n+1)
+    skew_pmf_values = stats.binom.pmf(skew_x_values, skew_n, skew_p)
+    # Find the mode (most likely value)
+    skew_mode = skew_x_values[np.argmax(skew_pmf_values)]
+    # Calculate the expected value
+    skew_expected = skew_n * skew_p
+    skew_expected_rounded = round(skew_expected, 2)
+    skew_fig, skew_ax = plt.subplots(figsize=(10, 5))
+    skew_ax.bar(skew_x_values, skew_pmf_values, alpha=0.7, width=0.4)
+    # Add vertical lines for mode and expected value
+    skew_ax.axvline(x=skew_mode, color='g', linestyle='--', linewidth=2,
+                label=f'Mode = {skew_mode} (Most likely value)')
+    skew_ax.axvline(x=skew_expected, color='r', linestyle='--', linewidth=2,
+                label=f'Expected Value = {skew_expected_rounded} (Mean)')
+    skew_ax.annotate('Mode', xy=(skew_mode, 0.05), xytext=(skew_mode-2.0, 0.1),
+                arrowprops=dict(facecolor='green', shrink=0.05, width=1.5), color='green')
+    skew_ax.annotate('Expected Value', xy=(skew_expected, 0.05), xytext=(skew_expected+1, 0.15),
+                arrowprops=dict(facecolor='red', shrink=0.05, width=1.5), color='red')
+    if skew_mode != int(skew_expected):
+        min_x = min(skew_mode, skew_expected)
+        max_x = max(skew_mode, skew_expected)
+        skew_ax.axvspan(min_x, max_x, alpha=0.2, color='purple')
+        # Add text explaining the difference
+        mid_x = (skew_mode + skew_expected) / 2
+        skew_ax.text(mid_x, max(skew_pmf_values) * 0.5,
+                 f"Difference: {abs(skew_mode - skew_expected_rounded):.2f}",
+                 ha='center', va='center', bbox=dict(facecolor='white', alpha=0.7))
+    skew_ax.set_xlabel('Number of Successes')
+    skew_ax.set_ylabel('Probability')
+    skew_ax.set_title(f'Binomial Distribution (n={skew_n}, p={skew_p})')
+    skew_ax.grid(alpha=0.3)
+    skew_ax.legend()
+    plt.tight_layout()
+    plt.gca()
+    return (
+        max_x,
+        mid_x,
+        min_x,
+        skew_ax,
+        skew_expected,
+        skew_expected_rounded,
+        skew_fig,
+        skew_mode,
+        skew_n,
+        skew_p,
+        skew_pmf_values,
+        skew_x_values,
+    )
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        /// NOTE
+        For the sum of two dice we calculated earlier, we found the expected value to be exactly 7. In that case, 7 also happens to be the mode (most likely outcome) of the distribution. However, this is just a coincidence for this particular example!
+        As we can see from the binomial distribution above, the expected value (2.50) and the mode (2) are often different values (this is common in skewed distributions). The expected value represents the "center of mass" of the distribution, while the mode represents the most likely single outcome.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## 🤔 Test Your Understanding
+        Choose what you believe are the correct options in the questions below:
+        <details>
+        <summary>The expected value of a random variable is always one of the possible values the random variable can take.</summary>
+        ❌ False! The expected value is a weighted average and may not be a value the random variable can actually take. For example, the expected value of a fair die roll is 3.5, which is not a possible outcome.
+        </details>
+        <details>
+        <summary>If X and Y are independent random variables, then E[XÂ·Y] = E[X]Â·E[Y].</summary>
+        ✅ True! For independent random variables, the expectation of their product equals the product of their expectations.
+        </details>
+        <details>
+        <summary>The expected value of a constant random variable (one that always takes the same value) is that constant.</summary>
+        ✅ True! If X = c with probability 1, then E[X] = c.
+        </details>
+        <details>
+        <summary>The expected value of the sum of two random variables is always the sum of their expected values, regardless of whether they are independent.</summary>
+        ✅ True! This is the linearity of expectation property: E[X + Y] = E[X] + E[Y], which holds regardless of dependence.
+        </details>
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Practical Applications of Expectation
+        Expected values show up everywhere - from investment decisions and insurance pricing to machine learning algorithms and game design. Engineers use them to predict system reliability, data scientists to understand customer behavior, and economists to model market outcomes. They're essential for risk assessment in project management and for optimizing resource allocation in operations research.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Key Takeaways
+        Expectation gives us a single value that summarizes a random variable's central tendency - it's the weighted average of all possible outcomes, where the weights are probabilities. The linearity property makes expectations easy to work with, even for complex combinations of random variables. While a PMF gives the complete probability picture, expectation provides an essential summary that helps us make decisions under uncertainty. In our next notebook, we'll explore variance, which measures how spread out a random variable's values are around its expectation.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""#### Appendix (containing helper code)""")
+    return
+@app.cell(hide_code=True)
+def _():
+    import marimo as mo
+    return (mo,)
+@app.cell(hide_code=True)
+def _():
+    import matplotlib.pyplot as plt
+    import numpy as np
+    from scipy import stats
+    import collections
+    return collections, np, plt, stats
+@app.cell(hide_code=True)
+def _(dist_selection, mo):
+    # Parameter controls for probability-based distributions
+    param_p = mo.ui.slider(
+        start=0.01,
+        stop=0.99,
+        step=0.01,
+        value=0.5,
+        label="p (probability of success)",
+        full_width=True
+    )
+    # Parameter control for binomial distribution
+    param_n = mo.ui.slider(
+        start=1,
+        stop=50,
+        step=1,
+        value=10,
+        label="n (number of trials)",
+        full_width=True
+    )
+    # Parameter control for Poisson distribution
+    param_lambda = mo.ui.slider(
+        start=0.1,
+        stop=20,
+        step=0.1,
+        value=5,
+        label="λ (rate parameter)",
+        full_width=True
+    )
+    # Parameter range sliders for visualization
+    param_range = mo.ui.range_slider(
+        start=0,
+        stop=1,
+        step=0.01,
+        value=[0, 1],
+        label="Parameter range to visualize",
+        full_width=True
+    )
+    lambda_range = mo.ui.range_slider(
+        start=0,
+        stop=20,
+        step=0.1,
+        value=[0, 20],
+        label="λ range to visualize",
+        full_width=True
+    )
+    # Display appropriate controls based on the selected distribution
+    if dist_selection.value == "bernoulli":
+        controls = mo.hstack([param_p, param_range], justify="space-around")
+    elif dist_selection.value == "binomial":
+        controls = mo.hstack([param_p, param_n, param_range], justify="space-around")
+    elif dist_selection.value == "geometric":
+        controls = mo.hstack([param_p, param_range], justify="space-around")
+    else:  # poisson
+        controls = mo.hstack([param_lambda, lambda_range], justify="space-around")
+    return controls, lambda_range, param_lambda, param_n, param_p, param_range
+@app.cell(hide_code=True)
+def _(dist_selection, mo):
+    # Create distribution descriptions based on selection
+    if dist_selection.value == "bernoulli":
+        dist_description = mo.md(
+            r"""
+            **Bernoulli Distribution**
+            A Bernoulli distribution models a single trial with two possible outcomes: success (1) or failure (0).
+            - Parameter: $p$ = probability of success
+            - Expected Value: $E[X] = p$
+            - Example: Flipping a coin once (p = 0.5 for a fair coin)
+            """
+        )
+    elif dist_selection.value == "binomial":
+        dist_description = mo.md(
+            r"""
+            **Binomial Distribution**
+            A Binomial distribution models the number of successes in $n$ independent trials.
+            - Parameters: $n$ = number of trials, $p$ = probability of success
+            - Expected Value: $E[X] = np$
+            - Example: Number of heads in 10 coin flips
+            """
+        )
+    elif dist_selection.value == "geometric":
+        dist_description = mo.md(
+            r"""
+            **Geometric Distribution**
+            A Geometric distribution models the number of trials until the first success.
+            - Parameter: $p$ = probability of success
+            - Expected Value: $E[X] = \frac{1}{p}$
+            - Example: Number of coin flips until first heads
+            """
+        )
+    else:  # poisson
+        dist_description = mo.md(
+            r"""
+            **Poisson Distribution**
+            A Poisson distribution models the number of events occurring in a fixed interval.
+            - Parameter: $\lambda$ = average rate of events
+            - Expected Value: $E[X] = \lambda$
+            - Example: Number of emails received per hour
+            """
+        )
+    return (dist_description,)
+if __name__ == "__main__":
+    app.run()

probability/12_variance.py ADDED Viewed

	@@ -0,0 +1,631 @@

+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "marimo",
+#     "matplotlib==3.10.0",
+#     "numpy==2.2.3",
+#     "scipy==1.15.2",
+#     "wigglystuff==0.1.10",
+# ]
+# ///
+import marimo
+__generated_with = "0.11.20"
+app = marimo.App(width="medium", app_title="Variance")
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        # Variance
+        _This notebook is a computational companion to ["Probability for Computer Scientists"](https://chrispiech.github.io/probabilityForComputerScientists/en/part2/variance/), by Stanford professor Chris Piech._
+        In our previous exploration of random variables, we learned about expectation - a measure of central tendency. However, knowing the average value alone doesn't tell us everything about a distribution. Consider these questions:
+        - How spread out are the values around the mean?
+        - How reliable is the expectation as a predictor of individual outcomes?
+        - How much do individual samples typically deviate from the average?
+        This is where **variance** comes in - it measures the spread or dispersion of a random variable around its expected value.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Definition of Variance
+        The variance of a random variable $X$ with expected value $\mu = E[X]$ is defined as:
+        $$\text{Var}(X) = E[(X-\mu)^2]$$
+        This definition captures the average squared deviation from the mean. There's also an equivalent, often more convenient formula:
+        $$\text{Var}(X) = E[X^2] - (E[X])^2$$
+        /// tip
+        The second formula is usually easier to compute, as it only requires calculating $E[X^2]$ and $E[X]$, rather than working with deviations from the mean.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Intuition Through Example
+        Let's look at a real-world example that illustrates why variance is important. Consider three different groups of graders evaluating assignments in a massive online course. Each grader has their own "grading distribution" - their pattern of assigning scores to work that deserves a 70/100.
+        The visualization below shows the probability distributions for three types of graders. Try clicking and dragging the blue numbers to adjust the parameters and see how they affect the variance.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        /// TIP
+        Try adjusting the blue numbers above to see how:
+        - Increasing spread increases variance
+        - The mixture ratio affects how many outliers appear in Grader C's distribution
+        - Changing the true grade shifts all distributions but maintains their relative variances
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(controls):
+    controls
+    return
+@app.cell(hide_code=True)
+def _(
+    grader_a_spread,
+    grader_b_spread,
+    grader_c_mix,
+    np,
+    plt,
+    stats,
+    true_grade,
+):
+    # Create data for three grader distributions
+    _grader_x = np.linspace(40, 100, 200)
+    # Calculate actual variances
+    var_a = grader_a_spread.amount**2
+    var_b = grader_b_spread.amount**2
+    var_c = (1-grader_c_mix.amount) * 3**2 + grader_c_mix.amount * 8**2 + \
+            grader_c_mix.amount * (1-grader_c_mix.amount) * (8-3)**2  # Mixture variance formula
+    # Grader A: Wide spread around true grade
+    grader_a = stats.norm.pdf(_grader_x, loc=true_grade.amount, scale=grader_a_spread.amount)
+    # Grader B: Narrow spread around true grade
+    grader_b = stats.norm.pdf(_grader_x, loc=true_grade.amount, scale=grader_b_spread.amount)
+    # Grader C: Mixture of distributions
+    grader_c = (1-grader_c_mix.amount) * stats.norm.pdf(_grader_x, loc=true_grade.amount, scale=3) + \
+               grader_c_mix.amount * stats.norm.pdf(_grader_x, loc=true_grade.amount, scale=8)
+    grader_fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15, 5))
+    # Plot each distribution
+    ax1.fill_between(_grader_x, grader_a, alpha=0.3, color='green', label=f'Var ≈ {var_a:.2f}')
+    ax1.axvline(x=true_grade.amount, color='black', linestyle='--', label='True Grade')
+    ax1.set_title('Grader A: High Variance')
+    ax1.set_xlabel('Grade')
+    ax1.set_ylabel('Pr(G = g)')
+    ax1.set_ylim(0, max(grader_a)*1.1)
+    ax2.fill_between(_grader_x, grader_b, alpha=0.3, color='blue', label=f'Var ≈ {var_b:.2f}')
+    ax2.axvline(x=true_grade.amount, color='black', linestyle='--')
+    ax2.set_title('Grader B: Low Variance')
+    ax2.set_xlabel('Grade')
+    ax2.set_ylim(0, max(grader_b)*1.1)
+    ax3.fill_between(_grader_x, grader_c, alpha=0.3, color='purple', label=f'Var ≈ {var_c:.2f}')
+    ax3.axvline(x=true_grade.amount, color='black', linestyle='--')
+    ax3.set_title('Grader C: Mixed Distribution')
+    ax3.set_xlabel('Grade')
+    ax3.set_ylim(0, max(grader_c)*1.1)
+    # Add annotations to explain what's happening
+    ax1.annotate('Wide spread = high variance',
+                xy=(true_grade.amount, max(grader_a)*0.5),
+                xytext=(true_grade.amount-15, max(grader_a)*0.7),
+                arrowprops=dict(facecolor='black', shrink=0.05, width=1))
+    ax2.annotate('Narrow spread = low variance',
+                xy=(true_grade.amount, max(grader_b)*0.5),
+                xytext=(true_grade.amount+8, max(grader_b)*0.7),
+                arrowprops=dict(facecolor='black', shrink=0.05, width=1))
+    ax3.annotate('Mixture creates outliers',
+                xy=(true_grade.amount+15, grader_c[np.where(_grader_x >= true_grade.amount+15)[0][0]]),
+                xytext=(true_grade.amount+5, max(grader_c)*0.7),
+                arrowprops=dict(facecolor='black', shrink=0.05, width=1))
+    # Add legends and adjust layout
+    for _ax in [ax1, ax2, ax3]:
+        _ax.legend()
+        _ax.grid(alpha=0.2)
+    plt.tight_layout()
+    plt.gca()
+    return (
+        ax1,
+        ax2,
+        ax3,
+        grader_a,
+        grader_b,
+        grader_c,
+        grader_fig,
+        var_a,
+        var_b,
+        var_c,
+    )
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        /// note
+        All three distributions have the same expected value (the true grade), but they differ significantly in their spread:
+        - **Grader A** has high variance - grades vary widely from the true value
+        - **Grader B** has low variance - grades consistently stay close to the true value
+        - **Grader C** has a mixture distribution - mostly consistent but with occasional extreme values
+        This illustrates why variance is crucial: two distributions can have the same mean but behave very differently in practice.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Computing Variance
+        Let's work through some concrete examples to understand how to calculate variance.
+        ### Example 1: Fair Die Roll
+        Consider rolling a fair six-sided die. We'll calculate its variance step by step:
+        """
+    )
+    return
+@app.cell
+def _(np):
+    # Define the die values and probabilities
+    die_values = np.array([1, 2, 3, 4, 5, 6])
+    die_probs = np.array([1/6] * 6)
+    # Calculate E[X]
+    expected_value = np.sum(die_values * die_probs)
+    # Calculate E[X^2]
+    expected_square = np.sum(die_values**2 * die_probs)
+    # Calculate Var(X) = E[X^2] - (E[X])^2
+    variance = expected_square - expected_value**2
+    # Calculate standard deviation
+    std_dev = np.sqrt(variance)
+    print(f"E[X] = {expected_value:.2f}")
+    print(f"E[X^2] = {expected_square:.2f}")
+    print(f"Var(X) = {variance:.2f}")
+    print(f"Standard Deviation = {std_dev:.2f}")
+    return (
+        die_probs,
+        die_values,
+        expected_square,
+        expected_value,
+        std_dev,
+        variance,
+    )
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        /// NOTE
+        For a fair die:
+        - The expected value (3.50) tells us the average roll
+        - The variance (2.92) tells us how much typical rolls deviate from this average
+        - The standard deviation (1.71) gives us this spread in the original units
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Properties of Variance
+        Variance has several important properties that make it useful for analyzing random variables:
+        1. **Non-negativity**: $\text{Var}(X) \geq 0$ for any random variable $X$
+        2. **Variance of a constant**: $\text{Var}(c) = 0$ for any constant $c$
+        3. **Scaling**: $\text{Var}(aX) = a^2\text{Var}(X)$ for any constant $a$
+        4. **Translation**: $\text{Var}(X + b) = \text{Var}(X)$ for any constant $b$
+        5. **Independence**: If $X$ and $Y$ are independent, then $\text{Var}(X + Y) = \text{Var}(X) + \text{Var}(Y)$
+        Let's verify a property with an example.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Proof of Variance Formula
+        The equivalence of the two variance formulas is a fundamental result in probability theory. Here's the proof:
+        Starting with the definition $\text{Var}(X) = E[(X-\mu)^2]$ where $\mu = E[X]$:
+        \begin{align}
+        \text{Var}(X) &= E[(X-\mu)^2] \\
+        &= \sum_x(x-\mu)^2P(x) && \text{Definition of Expectation}\\
+        &= \sum_x (x^2 -2\mu x + \mu^2)P(x) && \text{Expanding the square}\\
+        &= \sum_x x^2P(x)- 2\mu \sum_x xP(x) + \mu^2 \sum_x P(x) && \text{Distributing the sum}\\
+        &= E[X^2]- 2\mu E[X] + \mu^2 && \text{Definition of expectation}\\
+        &= E[X^2]- 2(E[X])^2 + (E[X])^2 && \text{Since }\mu = E[X]\\
+        &= E[X^2]- (E[X])^2 && \text{Simplifying}
+        \end{align}
+        /// tip
+        This proof shows why the formula $\text{Var}(X) = E[X^2] - (E[X])^2$ is so useful - it's much easier to compute $E[X^2]$ and $E[X]$ separately than to work with deviations directly.
+        """
+    )
+    return
+@app.cell
+def _(die_probs, die_values, np):
+    # Demonstrate scaling property
+    a = 2  # Scale factor
+    # Original variance
+    original_var = np.sum(die_values**2 * die_probs) - (np.sum(die_values * die_probs))**2
+    # Scaled random variable variance
+    scaled_values = a * die_values
+    scaled_var = np.sum(scaled_values**2 * die_probs) - (np.sum(scaled_values * die_probs))**2
+    print(f"Original Variance: {original_var:.2f}")
+    print(f"Scaled Variance (a={a}): {scaled_var:.2f}")
+    print(f"a^2 * Original Variance: {a**2 * original_var:.2f}")
+    print(f"Property holds: {abs(scaled_var - a**2 * original_var) < 1e-10}")
+    return a, original_var, scaled_values, scaled_var
+@app.cell
+def _():
+    # DIY : Prove more properties as shown above
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Standard Deviation
+        While variance is mathematically convenient, it has one practical drawback: its units are squared. For example, if we're measuring grades (0-100), the variance is in "grade points squared." This makes it hard to interpret intuitively.
+        The **standard deviation**, denoted by $\sigma$ or $\text{SD}(X)$, is the square root of variance:
+        $$\sigma = \sqrt{\text{Var}(X)}$$
+        /// tip
+        Standard deviation is often more intuitive because it's in the same units as the original data. For a normal distribution, approximately:
+        - 68% of values fall within 1 standard deviation of the mean
+        - 95% of values fall within 2 standard deviations
+        - 99.7% of values fall within 3 standard deviations
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(controls1):
+    controls1
+    return
+@app.cell(hide_code=True)
+def _(TangleSlider, mo):
+    normal_mean = mo.ui.anywidget(TangleSlider(
+        amount=0,
+        min_value=-5,
+        max_value=5,
+        step=0.5,
+        digits=1,
+        suffix=" units"
+    ))
+    normal_std = mo.ui.anywidget(TangleSlider(
+        amount=1,
+        min_value=0.1,
+        max_value=3,
+        step=0.1,
+        digits=1,
+        suffix=" units"
+    ))
+    # Create a grid layout for the controls
+    controls1 = mo.vstack([
+        mo.md("### Interactive Normal Distribution"),
+        mo.hstack([
+            mo.md("Adjust the parameters to see how standard deviation affects the shape of the distribution:"),
+        ]),
+        mo.hstack([
+            mo.md("Mean (μ): "),
+            normal_mean,
+            mo.md("   Standard deviation (σ): "),
+            normal_std
+        ], justify="start"),
+    ])
+    return controls1, normal_mean, normal_std
+@app.cell(hide_code=True)
+def _(normal_mean, normal_std, np, plt, stats):
+    # data for normal distribution
+    _normal_x = np.linspace(-10, 10, 1000)
+    _normal_y = stats.norm.pdf(_normal_x, loc=normal_mean.amount, scale=normal_std.amount)
+    # ranges for standard deviation intervals
+    one_sigma_left = normal_mean.amount - normal_std.amount
+    one_sigma_right = normal_mean.amount + normal_std.amount
+    two_sigma_left = normal_mean.amount - 2 * normal_std.amount
+    two_sigma_right = normal_mean.amount + 2 * normal_std.amount
+    three_sigma_left = normal_mean.amount - 3 * normal_std.amount
+    three_sigma_right = normal_mean.amount + 3 * normal_std.amount
+    # Create the plot
+    normal_fig, normal_ax = plt.subplots(figsize=(10, 6))
+    # Plot the distribution
+    normal_ax.plot(_normal_x, _normal_y, 'b-', linewidth=2)
+    # stdev intervals
+    normal_ax.fill_between(_normal_x, 0, _normal_y, where=(_normal_x >= one_sigma_left) & (_normal_x <= one_sigma_right),
+                   alpha=0.3, color='red', label='68% (±1σ)')
+    normal_ax.fill_between(_normal_x, 0, _normal_y, where=(_normal_x >= two_sigma_left) & (_normal_x <= two_sigma_right),
+                   alpha=0.2, color='green', label='95% (±2σ)')
+    normal_ax.fill_between(_normal_x, 0, _normal_y, where=(_normal_x >= three_sigma_left) & (_normal_x <= three_sigma_right),
+                   alpha=0.1, color='blue', label='99.7% (±3σ)')
+    # vertical lines for the mean and standard deviations
+    normal_ax.axvline(x=normal_mean.amount, color='black', linestyle='-', linewidth=1.5, label='Mean (μ)')
+    normal_ax.axvline(x=one_sigma_left, color='red', linestyle='--', linewidth=1)
+    normal_ax.axvline(x=one_sigma_right, color='red', linestyle='--', linewidth=1)
+    normal_ax.axvline(x=two_sigma_left, color='green', linestyle='--', linewidth=1)
+    normal_ax.axvline(x=two_sigma_right, color='green', linestyle='--', linewidth=1)
+    # annotations
+    normal_ax.annotate(f'μ = {normal_mean.amount:.2f}',
+               xy=(normal_mean.amount, max(_normal_y)*0.5),
+               xytext=(normal_mean.amount + 0.5, max(_normal_y)*0.8),
+               arrowprops=dict(facecolor='black', shrink=0.05, width=1))
+    normal_ax.annotate(f'σ = {normal_std.amount:.2f}',
+               xy=(one_sigma_right, stats.norm.pdf(one_sigma_right, loc=normal_mean.amount, scale=normal_std.amount)),
+               xytext=(one_sigma_right + 0.5, max(_normal_y)*0.6),
+               arrowprops=dict(facecolor='red', shrink=0.05, width=1))
+    # labels and title
+    normal_ax.set_xlabel('Value')
+    normal_ax.set_ylabel('Probability Density')
+    normal_ax.set_title(f'Normal Distribution with μ = {normal_mean.amount:.2f} and σ = {normal_std.amount:.2f}')
+    # legend and grid
+    normal_ax.legend()
+    normal_ax.grid(alpha=0.3)
+    plt.tight_layout()
+    plt.gca()
+    return (
+        normal_ax,
+        normal_fig,
+        one_sigma_left,
+        one_sigma_right,
+        three_sigma_left,
+        three_sigma_right,
+        two_sigma_left,
+        two_sigma_right,
+    )
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        /// tip
+        The interactive visualization above demonstrates how standard deviation (σ) affects the shape of a normal distribution:
+        - The **red region** covers μ ± 1σ, containing approximately 68% of the probability
+        - The **green region** covers μ ± 2σ, containing approximately 95% of the probability
+        - The **blue region** covers μ ± 3σ, containing approximately 99.7% of the probability
+        This is known as the "68-95-99.7 rule" or the "empirical rule" and is a useful heuristic for understanding the spread of data.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## 🤔 Test Your Understanding
+        Choose what you believe are the correct options in the questions below:
+        <details>
+        <summary>The variance of a random variable can be negative.</summary>
+        ❌ False! Variance is defined as an expected value of squared deviations, and squares are always non-negative.
+        </details>
+        <details>
+        <summary>If X and Y are independent random variables, then Var(X + Y) = Var(X) + Var(Y).</summary>
+        ✅ True! This is one of the key properties of variance for independent random variables.
+        </details>
+        <details>
+        <summary>Multiplying a random variable by 2 multiplies its variance by 2.</summary>
+        ❌ False! Multiplying a random variable by a constant a multiplies its variance by a². So multiplying by 2 multiplies variance by 4.
+        </details>
+        <details>
+        <summary>Standard deviation is always equal to the square root of variance.</summary>
+        ✅ True! By definition, standard deviation σ = √Var(X).
+        </details>
+        <details>
+        <summary>If Var(X) = 0, then X must be a constant.</summary>
+        ✅ True! Zero variance means there is no spread around the mean, so X can only take one value.
+        </details>
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Key Takeaways
+        Variance gives us a way to measure how spread out a random variable is around its mean. It's like the "uncertainty" in our expectation - a high variance means individual outcomes can differ widely from what we expect on average.
+        Standard deviation brings this measure back to the original units, making it easier to interpret. For grades, a standard deviation of 10 points means typical grades fall within about 10 points of the average.
+        Variance pops up everywhere - from weather forecasts (how reliable is the predicted temperature?) to financial investments (how risky is this stock?) to quality control (how consistent is our manufacturing process?).
+        In our next notebook, we'll explore more properties of random variables and see how they combine to form more complex distributions.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""Appendix (containing helper code):""")
+    return
+@app.cell(hide_code=True)
+def _():
+    import marimo as mo
+    return (mo,)
+@app.cell(hide_code=True)
+def _():
+    import numpy as np
+    import scipy.stats as stats
+    import matplotlib.pyplot as plt
+    from wigglystuff import TangleSlider
+    return TangleSlider, np, plt, stats
+@app.cell(hide_code=True)
+def _(TangleSlider, mo):
+    # Create interactive elements using TangleSlider for a more inline experience
+    true_grade = mo.ui.anywidget(TangleSlider(
+        amount=70,
+        min_value=50,
+        max_value=90,
+        step=5,
+        digits=0,
+        suffix=" points"
+    ))
+    grader_a_spread = mo.ui.anywidget(TangleSlider(
+        amount=10,
+        min_value=5,
+        max_value=20,
+        step=1,
+        digits=0,
+        suffix=" points"
+    ))
+    grader_b_spread = mo.ui.anywidget(TangleSlider(
+        amount=2,
+        min_value=1,
+        max_value=5,
+        step=0.5,
+        digits=1,
+        suffix=" points"
+    ))
+    grader_c_mix = mo.ui.anywidget(TangleSlider(
+        amount=0.2,
+        min_value=0,
+        max_value=1,
+        step=0.05,
+        digits=2,
+        suffix=" proportion"
+    ))
+    return grader_a_spread, grader_b_spread, grader_c_mix, true_grade
+@app.cell(hide_code=True)
+def _(grader_a_spread, grader_b_spread, grader_c_mix, mo, true_grade):
+    # Create a grid layout for the interactive controls
+    controls = mo.vstack([
+        mo.md("### Adjust Parameters to See How Variance Changes"),
+        mo.hstack([
+            mo.md("**True grade:** The correct score that should be assigned is "),
+            true_grade,
+            mo.md(" out of 100.")
+        ], justify="start"),
+        mo.hstack([
+            mo.md("**Grader A:** Has a wide spread with standard deviation of "),
+            grader_a_spread,
+            mo.md(" points.")
+        ], justify="start"),
+        mo.hstack([
+            mo.md("**Grader B:** Has a narrow spread with standard deviation of "),
+            grader_b_spread,
+            mo.md(" points.")
+        ], justify="start"),
+        mo.hstack([
+            mo.md("**Grader C:** Has a mixture distribution with "),
+            grader_c_mix,
+            mo.md(" proportion of outliers.")
+        ], justify="start"),
+    ])
+    return (controls,)
+if __name__ == "__main__":
+    app.run()

probability/13_bernoulli_distribution.py ADDED Viewed

	@@ -0,0 +1,427 @@

+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "marimo",
+#     "matplotlib==3.10.0",
+#     "numpy==2.2.3",
+#     "scipy==1.15.2",
+# ]
+# ///
+import marimo
+__generated_with = "0.11.22"
+app = marimo.App(width="medium", app_title="Bernoulli Distribution")
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        # Bernoulli Distribution
+        _This notebook is a computational companion to ["Probability for Computer Scientists"](https://chrispiech.github.io/probabilityForComputerScientists/en/part2/bernoulli/), by Stanford professor Chris Piech._
+        ## Parametric Random Variables
+        There are many classic and commonly-seen random variable abstractions that show up in the world of probability. At this point, we'll learn about several of the most significant parametric discrete distributions.
+        When solving problems, if you can recognize that a random variable fits one of these formats, then you can use its pre-derived Probability Mass Function (PMF), expectation, variance, and other properties. Random variables of this sort are called **parametric random variables**. If you can argue that a random variable falls under one of the studied parametric types, you simply need to provide parameters.
+        > A good analogy is a `class` in programming. Creating a parametric random variable is very similar to calling a constructor with input parameters.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Bernoulli Random Variables
+        A **Bernoulli random variable** (also called a boolean or indicator random variable) is the simplest kind of parametric random variable. It can take on two values: 1 and 0.
+        It takes on a 1 if an experiment with probability $p$ resulted in success and a 0 otherwise.
+        Some example uses include:
+        - A coin flip (heads = 1, tails = 0)
+        - A random binary digit
+        - Whether a disk drive crashed
+        - Whether someone likes a Netflix movie
+        Here $p$ is the parameter, but different instances of Bernoulli random variables might have different values of $p$.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Key Properties of a Bernoulli Random Variable
+        If $X$ is declared to be a Bernoulli random variable with parameter $p$, denoted $X \sim \text{Bern}(p)$, it has the following properties:
+        """
+    )
+    return
+@app.cell
+def _(stats):
+    # Define the Bernoulli distribution function
+    def Bern(p):
+        return stats.bernoulli(p)
+    return (Bern,)
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Bernoulli Distribution Properties
+        $\begin{array}{lll}
+        \text{Notation:} & X \sim \text{Bern}(p) \\
+        \text{Description:} & \text{A boolean variable that is 1 with probability } p \\
+        \text{Parameters:} & p, \text{ the probability that } X = 1 \\
+        \text{Support:} & x \text{ is either 0 or 1} \\
+        \text{PMF equation:} & P(X = x) =
+            \begin{cases}
+                p & \text{if }x = 1\\
+                1-p & \text{if }x = 0
+            \end{cases} \\
+        \text{PMF (smooth):} & P(X = x) = p^x(1-p)^{1-x} \\
+        \text{Expectation:} & E[X] = p \\
+        \text{Variance:} & \text{Var}(X) = p(1-p) \\
+        \end{array}$
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo, p_slider):
+    # Visualization of the Bernoulli PMF
+    _p = p_slider.value
+    # Values for PMF
+    values = [0, 1]
+    probabilities = [1 - _p, _p]
+    # Relevant statistics
+    expected_value = _p
+    variance = _p * (1 - _p)
+    mo.md(f"""
+    ## PMF Graph for Bernoulli($p={_p:.2f}$)
+    Parameter $p$: {p_slider}
+    Expected value: $E[X] = {expected_value:.2f}$
+    Variance: $\\text{{Var}}(X) = {variance:.2f}$
+    """)
+    return expected_value, probabilities, values, variance
+@app.cell(hide_code=True)
+def _(expected_value, p_slider, plt, probabilities, values, variance):
+    # PMF
+    _p = p_slider.value
+    fig, ax = plt.subplots(figsize=(10, 6))
+    # Bar plot for PMF
+    ax.bar(values, probabilities, width=0.4, color='blue', alpha=0.7)
+    ax.set_xlabel('Values that X can take on')
+    ax.set_ylabel('Probability')
+    ax.set_title(f'PMF of Bernoulli Distribution with p = {_p:.2f}')
+    # x-axis limit
+    ax.set_xticks([0, 1])
+    ax.set_xlim(-0.5, 1.5)
+    # y-axis w/ some padding
+    ax.set_ylim(0, max(probabilities) * 1.1)
+    # Add expectation as vertical line
+    ax.axvline(x=expected_value, color='red', linestyle='--',
+               label=f'E[X] = {expected_value:.2f}')
+    # Add variance annotation
+    ax.text(0.5, max(probabilities) * 0.8,
+            f'Var(X) = {variance:.3f}',
+            horizontalalignment='center',
+            bbox=dict(facecolor='white', alpha=0.7))
+    ax.legend()
+    plt.tight_layout()
+    plt.gca()
+    return ax, fig
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Proof: Expectation of a Bernoulli
+        If $X$ is a Bernoulli with parameter $p$, $X \sim \text{Bern}(p)$:
+        \begin{align}
+        E[X] &= \sum_x x \cdot (X=x) && \text{Definition of expectation} \\
+        &= 1 \cdot p + 0 \cdot (1-p) &&
+        X \text{ can take on values 0 and 1} \\
+        &= p && \text{Remove the 0 term}
+        \end{align}
+        ## Proof: Variance of a Bernoulli
+        If $X$ is a Bernoulli with parameter $p$, $X \sim \text{Bern}(p)$:
+        To compute variance, first compute $E[X^2]$:
+        \begin{align}
+        E[X^2]
+        &= \sum_x x^2 \cdot (X=x) &&\text{LOTUS}\\
+        &= 0^2 \cdot (1-p) + 1^2 \cdot p\\
+        &= p
+        \end{align}
+        \begin{align}
+        (X)
+        &= E[X^2] - E[X]^2&& \text{Def of variance} \\
+        &= p - p^2 && \text{Substitute }E[X^2]=p, E[X] = p \\
+        &= p (1-p) && \text{Factor out }p
+        \end{align}
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Indicator Random Variable
+        > **Definition**: An indicator variable is a Bernoulli random variable which takes on the value 1 if an **underlying event occurs**, and 0 _otherwise_.
+        Indicator random variables are a convenient way to convert the "true/false" outcome of an event into a number. That number may be easier to incorporate into an equation.
+        A random variable $I$ is an indicator variable for an event $A$ if $I = 1$ when $A$ occurs and $I = 0$ if $A$ does not occur. Indicator random variables are Bernoulli random variables, with $p = P(A)$. $I_A$ is a common choice of name for an indicator random variable.
+        Here are some properties of indicator random variables:
+        - $P(I=1)=P(A)$
+        - $E[I]=P(A)$
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    # Simulation of Bernoulli trials
+    mo.md(r"""
+    ## Simulation of Bernoulli Trials
+    Let's simulate Bernoulli trials to see the law of large numbers in action. We'll flip a biased coin repeatedly and observe how the proportion of successes approaches the true probability $p$.
+    """)
+    # UI element for simulation parameters
+    num_trials_slider = mo.ui.slider(10, 10000, value=1000, step=10, label="Number of trials")
+    p_sim_slider = mo.ui.slider(0.01, 0.99, value=0.65, step=0.01, label="Success probability (p)")
+    return num_trials_slider, p_sim_slider
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""## Simulation""")
+    return
+@app.cell(hide_code=True)
+def _(mo, num_trials_slider, p_sim_slider):
+    mo.hstack([num_trials_slider, p_sim_slider], justify='space-around')
+    return
+@app.cell(hide_code=True)
+def _(np, num_trials_slider, p_sim_slider, plt):
+    # Bernoulli trials
+    _num_trials = num_trials_slider.value
+    p = p_sim_slider.value
+    # Random Bernoulli trials
+    trials = np.random.binomial(1, p, size=_num_trials)
+    # Cumulative proportion of successes
+    cumulative_mean = np.cumsum(trials) / np.arange(1, _num_trials + 1)
+    # Results
+    plt.figure(figsize=(10, 6))
+    plt.plot(range(1, _num_trials + 1), cumulative_mean, label='Proportion of successes')
+    plt.axhline(y=p, color='r', linestyle='--', label=f'True probability (p={p})')
+    plt.xscale('log')  # Use log scale for better visualization
+    plt.xlabel('Number of trials')
+    plt.ylabel('Proportion of successes')
+    plt.title('Convergence of Sample Proportion to True Probability')
+    plt.legend()
+    plt.grid(True, alpha=0.3)
+    # Add annotation
+    plt.annotate('As the number of trials increases,\nthe proportion approaches p',
+                xy=(_num_trials, cumulative_mean[-1]),
+                xytext=(_num_trials/5, p + 0.1),
+                arrowprops=dict(facecolor='black', shrink=0.05, width=1))
+    plt.tight_layout()
+    plt.gca()
+    return cumulative_mean, p, trials
+@app.cell(hide_code=True)
+def _(mo, np, trials):
+    # Calculate statistics from the simulation
+    num_successes = np.sum(trials)
+    num_trials = len(trials)
+    proportion = num_successes / num_trials
+    # Display the results
+    mo.md(f"""
+    ### Simulation Results
+    - Number of trials: {num_trials}
+    - Number of successes: {num_successes}
+    - Proportion of successes: {proportion:.4f}
+    This demonstrates how the sample proportion approaches the true probability $p$ as the number of trials increases.
+    """)
+    return num_successes, num_trials, proportion
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## 🤔 Test Your Understanding
+        Pick which of these statements about Bernoulli random variables you think are correct:
+        /// details | The variance of a Bernoulli random variable is always less than or equal to 0.25
+        ✅ Correct! The variance $p(1-p)$ reaches its maximum value of 0.25 when $p = 0.5$.
+        ///
+        /// details | The expected value of a Bernoulli random variable must be either 0 or 1
+        ❌ Incorrect! The expected value is $p$, which can be any value between 0 and 1.
+        ///
+        /// details | If $X \sim \text{Bern}(0.3)$ and $Y \sim \text{Bern}(0.7)$, then $X$ and $Y$ have the same variance
+        ✅ Correct! $\text{Var}(X) = 0.3 \times 0.7 = 0.21$ and $\text{Var}(Y) = 0.7 \times 0.3 = 0.21$.
+        ///
+        /// details | Two independent coin flips can be modeled as the sum of two Bernoulli random variables
+        ✅ Correct! The sum would follow a Binomial distribution with $n=2$.
+        ///
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Applications of Bernoulli Random Variables
+        Bernoulli random variables are used in many real-world scenarios:
+        1. **Quality Control**: Testing if a manufactured item is defective (1) or not (0)
+        2. **A/B Testing**: Determining if a user clicks (1) or doesn't click (0) on a website button
+        3. **Medical Testing**: Checking if a patient tests positive (1) or negative (0) for a disease
+        4. **Election Modeling**: Modeling if a particular voter votes for candidate A (1) or not (0)
+        5. **Financial Markets**: Modeling if a stock price goes up (1) or down (0) in a simplified model
+        Because Bernoulli random variables are parametric, as soon as you declare a random variable to be of type Bernoulli, you automatically know all of its pre-derived properties!
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Summary
+        And that's a wrap on Bernoulli distributions! We've learnt the simplest of all probability distributions — the one that only has two possible outcomes. Flip a coin, check if an email is spam, see if your blind date shows up — these are all Bernoulli trials with success probability $p$.
+        The beauty of Bernoulli is in its simplicity: just set $p$ (the probability of success) and you're good to go! The PMF gives us $P(X=1) = p$ and $P(X=0) = 1-p$, while expectation is simply $p$ and variance is $p(1-p)$. Oh, and when you're tracking whether specific events happen or not? That's an indicator random variable — just another Bernoulli in disguise!
+        Two key things to remember:
+        /// note
+        💡 **Maximum Variance**: A Bernoulli's variance $p(1-p)$ reaches its maximum at $p=0.5$, making a fair coin the most "unpredictable" Bernoulli random variable.
+        💡 **Instant Properties**: When you identify a random variable as Bernoulli, you instantly know all its properties—expectation, variance, PMF—without additional calculations.
+        ///
+        Next up: Binomial distribution—where we'll see what happens when we let Bernoulli trials have a party and add themselves together!
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""#### Appendix (containing helper code for the notebook)""")
+    return
+@app.cell
+def _():
+    import marimo as mo
+    return (mo,)
+@app.cell(hide_code=True)
+def _():
+    from marimo import Html
+    return (Html,)
+@app.cell(hide_code=True)
+def _():
+    import numpy as np
+    import matplotlib.pyplot as plt
+    from scipy import stats
+    import math
+    # Set style for consistent visualizations
+    plt.style.use('seaborn-v0_8-whitegrid')
+    plt.rcParams['figure.figsize'] = [10, 6]
+    plt.rcParams['font.size'] = 12
+    # Set random seed for reproducibility
+    np.random.seed(42)
+    return math, np, plt, stats
+@app.cell(hide_code=True)
+def _(mo):
+    # Create a UI element for the parameter p
+    p_slider = mo.ui.slider(0.01, 0.99, value=0.65, step=0.01, label="Parameter p")
+    return (p_slider,)
+if __name__ == "__main__":
+    app.run()

probability/14_binomial_distribution.py ADDED Viewed

	@@ -0,0 +1,545 @@

+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "marimo",
+#     "matplotlib==3.10.0",
+#     "numpy==2.2.4",
+#     "scipy==1.15.2",
+#     "altair==5.2.0",
+#     "wigglystuff==0.1.10",
+#     "pandas==2.2.3",
+# ]
+# ///
+import marimo
+__generated_with = "0.11.24"
+app = marimo.App(width="medium", app_title="Binomial Distribution")
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        # Binomial Distribution
+        _This notebook is a computational companion to ["Probability for Computer Scientists"](https://chrispiech.github.io/probabilityForComputerScientists/en/part2/binomial/), by Stanford professor Chris Piech._
+        In this section, we will discuss the binomial distribution. To start, imagine the following example:
+        Consider $n$ independent trials of an experiment where each trial is a "success" with probability $p$. Let $X$ be the number of successes in $n$ trials.
+        This situation is truly common in the natural world, and as such, there has been a lot of research into such phenomena. Random variables like $X$ are called **binomial random variables**. If you can identify that a process fits this description, you can inherit many already proved properties such as the PMF formula, expectation, and variance!
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Binomial Random Variable Definition
+        $X \sim \text{Bin}(n, p)$ represents a binomial random variable where:
+        - $X$ is our random variable (number of successes)
+        - $\text{Bin}$ indicates it follows a binomial distribution
+        - $n$ is the number of trials
+        - $p$ is the probability of success in each trial
+        ```
+        X ~ Bin(n, p)
+         ↑    ↑  ↑
+         |    |  +-- Probability of
+         |    |      success on each
+         |    |      trial
+         |    +-- Number of trials
+         |
+        Our random variable
+          is distributed
+          as a Binomial
+        ```
+        Here are a few examples of binomial random variables:
+        - Number of heads in $n$ coin flips
+        - Number of 1's in randomly generated length $n$ bit string
+        - Number of disk drives crashed in 1000 computer cluster, assuming disks crash independently
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Properties of Binomial Distribution
+        | Property | Formula |
+        |----------|---------|
+        | Notation | $X \sim \text{Bin}(n, p)$ |
+        | Description | Number of "successes" in $n$ identical, independent experiments each with probability of success $p$ |
+        | Parameters | $n \in \{0, 1, \dots\}$, the number of experiments<br>$p \in [0, 1]$, the probability that a single experiment gives a "success" |
+        | Support | $x \in \{0, 1, \dots, n\}$ |
+        | PMF equation | $P(X=x) = {n \choose x}p^x(1-p)^{n-x}$ |
+        | Expectation | $E[X] = n \cdot p$ |
+        | Variance | $\text{Var}(X) = n \cdot p \cdot (1-p)$ |
+        Let's explore how the binomial distribution changes with different parameters.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(TangleSlider, mo):
+    # Interactive elements using TangleSlider
+    n_slider = mo.ui.anywidget(TangleSlider(
+        amount=10,
+        min_value=1,
+        max_value=30,
+        step=1,
+        digits=0,
+        suffix=" trials"
+    ))
+    p_slider = mo.ui.anywidget(TangleSlider(
+        amount=0.5,
+        min_value=0.01,
+        max_value=0.99,
+        step=0.01,
+        digits=2,
+        suffix=" probability"
+    ))
+    # Grid layout for the interactive controls
+    controls = mo.vstack([
+        mo.md("### Adjust Parameters to See How Binomial Distribution Changes"),
+        mo.hstack([
+            mo.md("**Number of trials (n):** "),
+            n_slider
+        ], justify="start"),
+        mo.hstack([
+            mo.md("**Probability of success (p):** "),
+            p_slider
+        ], justify="start"),
+    ])
+    return controls, n_slider, p_slider
+@app.cell(hide_code=True)
+def _(controls):
+    controls
+    return
+@app.cell(hide_code=True)
+def _(n_slider, np, p_slider, plt, stats):
+    # Parameters from sliders
+    _n = int(n_slider.amount)
+    _p = p_slider.amount
+    # Calculate PMF
+    _x = np.arange(0, _n + 1)
+    _pmf = stats.binom.pmf(_x, _n, _p)
+    # Relevant stats
+    _mean = _n * _p
+    _variance = _n * _p * (1 - _p)
+    _std_dev = np.sqrt(_variance)
+    _fig, _ax = plt.subplots(figsize=(10, 6))
+    # Plot PMF as bars
+    _ax.bar(_x, _pmf, color='royalblue', alpha=0.7, label=f'PMF: P(X=k)')
+    # Add a line
+    _ax.plot(_x, _pmf, 'ro-', alpha=0.6, label='PMF line')
+    # Add vertical lines
+    _ax.axvline(x=_mean, color='green', linestyle='--', linewidth=2,
+               label=f'Mean: {_mean:.2f}')
+    # Shade the stdev region
+    _ax.axvspan(_mean - _std_dev, _mean + _std_dev, alpha=0.2, color='green',
+               label=f'±1 Std Dev: {_std_dev:.2f}')
+    # Add labels and title
+    _ax.set_xlabel('Number of Successes (k)')
+    _ax.set_ylabel('Probability: P(X=k)')
+    _ax.set_title(f'Binomial Distribution with n={_n}, p={_p:.2f}')
+    # Annotations
+    _ax.annotate(f'E[X] = {_mean:.2f}',
+                xy=(_mean, stats.binom.pmf(int(_mean), _n, _p)),
+                xytext=(_mean + 1, max(_pmf) * 0.8),
+                arrowprops=dict(facecolor='black', shrink=0.05, width=1))
+    _ax.annotate(f'Var(X) = {_variance:.2f}',
+                xy=(_mean, stats.binom.pmf(int(_mean), _n, _p) / 2),
+                xytext=(_mean + 1, max(_pmf) * 0.6),
+                arrowprops=dict(facecolor='black', shrink=0.05, width=1))
+    # Grid and legend
+    _ax.grid(alpha=0.3)
+    _ax.legend()
+    plt.tight_layout()
+    plt.gca()
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Relationship to Bernoulli Random Variables
+        One way to think of the binomial is as the sum of $n$ Bernoulli variables. Say that $Y_i$ is an indicator Bernoulli random variable which is 1 if experiment $i$ is a success. Then if $X$ is the total number of successes in $n$ experiments, $X \sim \text{Bin}(n, p)$:
+        $$X = \sum_{i=1}^n Y_i$$
+        Recall that the outcome of $Y_i$ will be 1 or 0, so one way to think of $X$ is as the sum of those 1s and 0s.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Binomial Probability Mass Function (PMF)
+        The most important property to know about a binomial is its [Probability Mass Function](https://marimo.app/https://github.com/marimo-team/learn/blob/main/probability/10_probability_mass_function.py):
+        $$P(X=k) = {n \choose k}p^k(1-p)^{n-k}$$
+        ```
+        P(X = k) = (n) p^k(1-p)^(n-k)
+         ↑           (k)
+         |            ↑
+         |            +-- Binomial coefficient:
+         |                number of ways to choose
+         |                k successes from n trials
+         |
+        Probability that our
+        variable takes on the
+        value k
+        ```
+        Recall, we derived this formula in Part 1. There is a complete example on the probability of $k$ heads in $n$ coin flips, where each flip is heads with probability $p$.
+        To briefly review, if you think of each experiment as being distinct, then there are ${n \choose k}$ ways of permuting $k$ successes from $n$ experiments. For any of the mutually exclusive permutations, the probability of that permutation is $p^k \cdot (1-p)^{n-k}$.
+        The name binomial comes from the term ${n \choose k}$ which is formally called the binomial coefficient.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Expectation of Binomial
+        There is an easy way to calculate the expectation of a binomial and a hard way. The easy way is to leverage the fact that a binomial is the sum of Bernoulli indicator random variables $X = \sum_{i=1}^{n} Y_i$ where $Y_i$ is an indicator of whether the $i$-th experiment was a success: $Y_i \sim \text{Bernoulli}(p)$.
+        Since the [expectation of the sum](http://marimo.app/https://github.com/marimo-team/learn/blob/main/probability/11_expectation.py) of random variables is the sum of expectations, we can add the expectation, $E[Y_i] = p$, of each of the Bernoulli's:
+        \begin{align}
+        E[X] &= E\Big[\sum_{i=1}^{n} Y_i\Big] && \text{Since }X = \sum_{i=1}^{n} Y_i \\
+        &= \sum_{i=1}^{n}E[ Y_i] && \text{Expectation of sum} \\
+        &= \sum_{i=1}^{n}p && \text{Expectation of Bernoulli} \\
+        &= n \cdot p && \text{Sum $n$ times}
+        \end{align}
+        The hard way is to use the definition of expectation:
+        \begin{align}
+        E[X] &= \sum_{i=0}^n i \cdot P(X = i) && \text{Def of expectation} \\
+        &= \sum_{i=0}^n i \cdot {n \choose i} p^i(1-p)^{n-i} && \text{Sub in PMF} \\
+        & \cdots && \text{Many steps later} \\
+        &= n \cdot p
+        \end{align}
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Binomial Distribution in Python
+        As you might expect, you can use binomial distributions in code. The standardized library for binomials is `scipy.stats.binom`.
+        One of the most helpful methods that this package provides is a way to calculate the PMF. For example, say $n=5$, $p=0.6$ and you want to find $P(X=2)$, you could use the following code:
+        """
+    )
+    return
+@app.cell
+def _(stats):
+    # define variables for x, n, and p
+    _n = 5  # Integer value for n
+    _p = 0.6
+    _x = 2
+    # use scipy to compute the pmf
+    p_x = stats.binom.pmf(_x, _n, _p)
+    # use the probability for future work
+    print(f'P(X = {_x}) = {p_x:.4f}')
+    return (p_x,)
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""Another particularly helpful function is the ability to generate a random sample from a binomial. For example, say $X$ represents the number of requests to a website. We can draw 100 samples from this distribution using the following code:""")
+    return
+@app.cell
+def _(n, p, stats):
+    n_int = int(n)
+    # samples from the binomial distribution
+    samples = stats.binom.rvs(n_int, p, size=100)
+    # Print the samples
+    print(samples)
+    return n_int, samples
+@app.cell(hide_code=True)
+def _(n_int, np, p, plt, samples, stats):
+    # Plot histogram of samples
+    plt.figure(figsize=(10, 5))
+    plt.hist(samples, bins=np.arange(-0.5, n_int+1.5, 1), alpha=0.7, color='royalblue',
+             edgecolor='black', density=True)
+    # Overlay the PMF
+    x_values = np.arange(0, n_int+1)
+    pmf_values = stats.binom.pmf(x_values, n_int, p)
+    plt.plot(x_values, pmf_values, 'ro-', ms=8, label='Theoretical PMF')
+    # Add labels and title
+    plt.xlabel('Number of Successes')
+    plt.ylabel('Relative Frequency / Probability')
+    plt.title(f'Histogram of 100 Samples from Bin({n_int}, {p})')
+    plt.legend()
+    plt.grid(alpha=0.3)
+    # Annotate
+    plt.annotate('Sample mean: %.2f' % np.mean(samples),
+                xy=(0.7, 0.9), xycoords='axes fraction',
+                bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.3))
+    plt.annotate('Theoretical mean: %.2f' % (n_int*p),
+                xy=(0.7, 0.8), xycoords='axes fraction',
+                bbox=dict(boxstyle='round,pad=0.5', fc='lightgreen', alpha=0.3))
+    plt.tight_layout()
+    plt.gca()
+    return pmf_values, x_values
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        You might be wondering what a random sample is! A random sample is a randomly chosen assignment for our random variable. Above we have 100 such assignments. The probability that value $k$ is chosen is given by the PMF: $P(X=k)$.
+        There are also functions for getting the mean, the variance, and more. You can read the [scipy.stats.binom documentation](https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.binom.html), especially the list of methods.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Interactive Exploration of Binomial vs. Negative Binomial
+        The standard binomial distribution is a special case of a broader family of distributions. One related distribution is the negative binomial, which can model count data with overdispersion (where the variance is larger than the mean).
+        Below, you can explore how the negative binomial distribution compares to a Poisson distribution (which can be seen as a limiting case of the binomial as $n$ gets large and $p$ gets small, with $np$ held constant).
+        Adjust the sliders to see how the parameters affect the distribution:
+        *Note: The interactive visualization in this section was inspired by work from [liquidcarbon on GitHub](https://github.com/liquidcarbon).*
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(alpha_slider, chart, equation, mo, mu_slider):
+    mo.vstack(
+        [
+            mo.md(f"## Negative Binomial Distribution (Poisson + Overdispersion)\n{equation}"),
+            mo.hstack([mu_slider, alpha_slider], justify="start"),
+            chart,
+        ], justify='space-around'
+    ).center()
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## 🤔 Test Your Understanding
+        Pick which of these statements about binomial distributions you think are correct:
+        /// details | The variance of a binomial distribution is always equal to its mean
+        ❌ Incorrect! The variance is $np(1-p)$ while the mean is $np$. They're only equal when $p=1$ (which is a degenerate case).
+        ///
+        /// details | If $X \sim \text{Bin}(n, p)$ and $Y \sim \text{Bin}(n, 1-p)$, then $X$ and $Y$ have the same variance
+        ✅ Correct! $\text{Var}(X) = np(1-p)$ and $\text{Var}(Y) = n(1-p)p$, which are the same.
+        ///
+        /// details | As the number of trials increases, the binomial distribution approaches a normal distribution
+        ✅ Correct! For large $n$, the binomial distribution can be approximated by a normal distribution with the same mean and variance.
+        ///
+        /// details | The PMF of a binomial distribution is symmetric when $p = 0.5$
+        ✅ Correct! When $p = 0.5$, the PMF is symmetric around $n/2$.
+        ///
+        /// details | The sum of two independent binomial random variables with the same $p$ is also a binomial random variable
+        ✅ Correct! If $X \sim \text{Bin}(n_1, p)$ and $Y \sim \text{Bin}(n_2, p)$ are independent, then $X + Y \sim \text{Bin}(n_1 + n_2, p)$.
+        ///
+        /// details | The maximum value of the PMF for $\text{Bin}(n,p)$ always occurs at $k = np$
+        ❌ Incorrect! The mode (maximum value of PMF) is either $\lfloor (n+1)p \rfloor$ or $\lceil (n+1)p-1 \rceil$ depending on whether $(n+1)p$ is an integer.
+        ///
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Summary
+        So we've explored the binomial distribution, and honestly, it's one of the most practical probability distributions you'll encounter. Think about it — anytime you're counting successes in a fixed number of trials (like those coin flips we discussed), this is your go-to distribution.
+        I find it fascinating how the expectation is simply $np$. Such a clean, intuitive formula! And remember that neat visualization we saw earlier? When we adjusted the parameters, you could actually see how the distribution shape changes—becoming more symmetric as $n$ increases.
+        The key things to take away:
+        - The binomial distribution models the number of successes in $n$ independent trials, each with probability $p$ of success
+        - Its PMF is given by the formula $P(X=k) = {n \choose k}p^k(1-p)^{n-k}$, which lets us calculate exactly how likely any specific number of successes is
+        - The expected value is $E[X] = np$ and the variance is $Var(X) = np(1-p)$
+        - It's related to other distributions: it's essentially a sum of Bernoulli random variables, and connects to both the negative binomial and Poisson distributions
+        - In Python, the `scipy.stats.binom` module makes working with binomial distributions straightforward—you can generate random samples and calculate probabilities with just a few lines of code
+        You'll see the binomial distribution pop up everywhere—from computer science to quality control, epidemiology, and data science. Any time you have scenarios with binary outcomes over multiple trials, this distribution has you covered.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""Appendix code (helper functions, variables, etc.):""")
+    return
+@app.cell
+def _():
+    import marimo as mo
+    return (mo,)
+@app.cell(hide_code=True)
+def _():
+    import numpy as np
+    import matplotlib.pyplot as plt
+    import scipy.stats as stats
+    import pandas as pd
+    import altair as alt
+    from wigglystuff import TangleSlider
+    return TangleSlider, alt, np, pd, plt, stats
+@app.cell(hide_code=True)
+def _(mo):
+    alpha_slider = mo.ui.slider(
+        value=0.1,
+        steps=[0, 0.01, 0.02, 0.03, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.8, 1],
+        label="α (overdispersion)",
+        show_value=True,
+    )
+    mu_slider = mo.ui.slider(
+        value=100, start=1, stop=100, step=1, label="μ (mean)", show_value=True
+    )
+    return alpha_slider, mu_slider
+@app.cell(hide_code=True)
+def _():
+    equation = """
+    $$
+    P(X = k) = \\frac{\\Gamma(k + \\frac{1}{\\alpha})}{\\Gamma(k + 1) \\Gamma(\\frac{1}{\\alpha})} \\left( \\frac{1}{\\mu \\alpha + 1} \\right)^{\\frac{1}{\\alpha}} \\left( \\frac{\\mu \\alpha}{\\mu \\alpha + 1} \\right)^k
+    $$
+    $$
+    \\sigma^2 = \\mu + \\alpha \\mu^2
+    $$
+    """
+    return (equation,)
+@app.cell(hide_code=True)
+def _(alpha_slider, alt, mu_slider, np, pd, stats):
+    mu = mu_slider.value
+    alpha = alpha_slider.value
+    n = 1000 - mu if alpha == 0 else 1 / alpha
+    p = n / (mu + n)
+    x = np.arange(0, mu * 3 + 1, 1)
+    df = pd.DataFrame(
+        {
+            "x": x,
+            "y": stats.nbinom.pmf(x, n, p),
+            "y_poi": stats.nbinom.pmf(x, 1000 - mu, 1 - mu / 1000),
+        }
+    )
+    r1k = stats.nbinom.rvs(n, p, size=1000)
+    df["in 95% CI"] = df["x"].between(*np.percentile(r1k, q=[2.5, 97.5]))
+    base = alt.Chart(df)
+    chart_poi = base.mark_bar(
+        fillOpacity=0.25, width=100 / mu, fill="magenta"
+    ).encode(
+        x=alt.X("x").scale(domain=(-0.4, x.max() + 0.4), nice=False),
+        y=alt.Y("y_poi").scale(domain=(0, df.y_poi.max() * 1.1)).title(None),
+    )
+    chart_nb = base.mark_bar(fillOpacity=0.75, width=100 / mu).encode(
+        x="x",
+        y="y",
+        fill=alt.Fill("in 95% CI")
+        .scale(domain=[False, True], range=["#aaa", "#7c7"])
+        .legend(orient="bottom-right"),
+    )
+    chart = (chart_poi + chart_nb).configure_view(continuousWidth=450)
+    return alpha, base, chart, chart_nb, chart_poi, df, mu, n, p, r1k, x
+if __name__ == "__main__":
+    app.run()

probability/15_poisson_distribution.py ADDED Viewed

	@@ -0,0 +1,805 @@

+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "marimo",
+#     "matplotlib==3.10.0",
+#     "numpy==2.2.4",
+#     "scipy==1.15.2",
+#     "altair==5.2.0",
+#     "wigglystuff==0.1.10",
+#     "pandas==2.2.3",
+# ]
+# ///
+import marimo
+__generated_with = "0.11.25"
+app = marimo.App(width="medium", app_title="Poisson Distribution")
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        # Poisson Distribution
+        _This notebook is a computational companion to ["Probability for Computer Scientists"](https://chrispiech.github.io/probabilityForComputerScientists/en/part2/poisson/), by Stanford professor Chris Piech._
+        A Poisson random variable gives the probability of a given number of events in a fixed interval of time (or space). It makes the Poisson assumption that events occur with a known constant mean rate and independently of the time since the last event.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Poisson Random Variable Definition
+        $X \sim \text{Poisson}(\lambda)$ represents a Poisson random variable where:
+        - $X$ is our random variable (number of events)
+        - $\text{Poisson}$ indicates it follows a Poisson distribution
+        - $\lambda$ is the rate parameter (average number of events per time interval)
+        ```
+        X ~ Poisson(λ)
+         ↑     ↑    ↑
+         |     |    +-- Rate parameter:
+         |     |        average number of
+         |     |        events per interval
+         |     +-- Indicates Poisson
+         |         distribution
+         |
+        Our random variable
+          counting number of events
+        ```
+        The Poisson distribution is particularly useful when:
+        1. Events occur independently of each other
+        2. The average rate of occurrence is constant
+        3. Two events cannot occur at exactly the same instant
+        4. The probability of an event is proportional to the length of the time interval
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Properties of Poisson Distribution
+        | Property | Formula |
+        |----------|---------|
+        | Notation | $X \sim \text{Poisson}(\lambda)$ |
+        | Description | Number of events in a fixed time frame if (a) events occur with a constant mean rate and (b) they occur independently of time since last event |
+        | Parameters | $\lambda \in \mathbb{R}^{+}$, the constant average rate |
+        | Support | $x \in \{0, 1, \dots\}$ |
+        | PMF equation | $P(X=x) = \frac{\lambda^x e^{-\lambda}}{x!}$ |
+        | Expectation | $E[X] = \lambda$ |
+        | Variance | $\text{Var}(X) = \lambda$ |
+        Note that unlike many other distributions, the Poisson distribution's mean and variance are equal, both being $\lambda$.
+        Let's explore how the Poisson distribution changes with different rate parameters.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(TangleSlider, mo):
+    # interactive elements using TangleSlider
+    lambda_slider = mo.ui.anywidget(TangleSlider(
+        amount=5,
+        min_value=0.1,
+        max_value=20,
+        step=0.1,
+        digits=1,
+        suffix=" events"
+    ))
+    # interactive controls
+    _controls = mo.vstack([
+        mo.md("### Adjust the Rate Parameter to See How Poisson Distribution Changes"),
+        mo.hstack([
+            mo.md("**Rate parameter (λ):** "),
+            lambda_slider,
+            mo.md("**events per interval.** Higher values shift the distribution rightward and make it more spread out.")
+        ], justify="start"),
+    ])
+    _controls
+    return (lambda_slider,)
+@app.cell(hide_code=True)
+def _(lambda_slider, np, plt, stats):
+    def create_poisson_pmf_plot(lambda_value):
+        """Create a visualization of Poisson PMF with annotations for mean and variance."""
+        # PMF for values
+        max_x = max(20, int(lambda_value * 3))  # Show at least up to 3*lambda
+        x = np.arange(0, max_x + 1)
+        pmf = stats.poisson.pmf(x, lambda_value)
+        # Relevant key statistics
+        mean = lambda_value  # For Poisson, mean = lambda
+        variance = lambda_value  # For Poisson, variance = lambda
+        std_dev = np.sqrt(variance)
+        # plot
+        fig, ax = plt.subplots(figsize=(10, 6))
+        # PMF as bars
+        ax.bar(x, pmf, color='royalblue', alpha=0.7, label=f'PMF: P(X=k)')
+        # for the PMF values
+        ax.plot(x, pmf, 'ro-', alpha=0.6, label='PMF line')
+        # Vertical lines - mean and key values
+        ax.axvline(x=mean, color='green', linestyle='--', linewidth=2,
+                label=f'Mean: {mean:.2f}')
+        # Stdev region
+        ax.axvspan(mean - std_dev, mean + std_dev, alpha=0.2, color='green',
+                label=f'±1 Std Dev: {std_dev:.2f}')
+        ax.set_xlabel('Number of Events (k)')
+        ax.set_ylabel('Probability: P(X=k)')
+        ax.set_title(f'Poisson Distribution with λ={lambda_value:.1f}')
+        # annotations
+        ax.annotate(f'E[X] = {mean:.2f}',
+                    xy=(mean, stats.poisson.pmf(int(mean), lambda_value)),
+                    xytext=(mean + 1, max(pmf) * 0.8),
+                    arrowprops=dict(facecolor='black', shrink=0.05, width=1))
+        ax.annotate(f'Var(X) = {variance:.2f}',
+                    xy=(mean, stats.poisson.pmf(int(mean), lambda_value) / 2),
+                    xytext=(mean + 1, max(pmf) * 0.6),
+                    arrowprops=dict(facecolor='black', shrink=0.05, width=1))
+        ax.grid(alpha=0.3)
+        ax.legend()
+        plt.tight_layout()
+        return plt.gca()
+    # Get parameter from slider and create plot
+    _lambda = lambda_slider.amount
+    create_poisson_pmf_plot(_lambda)
+    return (create_poisson_pmf_plot,)
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Poisson Intuition: Relation to Binomial Distribution
+        The Poisson distribution can be derived as a limiting case of the [binomial distribution](http://marimo.app/https://github.com/marimo-team/learn/blob/main/probability/14_binomial_distribution.py).
+        Let's work on a practical example: predicting the number of ride-sharing requests in a specific area over a one-minute interval. From historical data, we know that the average number of requests per minute is $\lambda = 5$.
+        We could approximate this using a binomial distribution by dividing our minute into smaller intervals. For example, we can divide a minute into 60 seconds and treat each second as a [Bernoulli trial](http://marimo.app/https://github.com/marimo-team/learn/blob/main/probability/13_bernoulli_distribution.py) - either there's a request (success) or there isn't (failure).
+        Let's visualize this concept:
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(fig_to_image, mo, plt):
+    def create_time_division_visualization():
+        # visualization of dividing a minute into 60 seconds
+        fig, ax = plt.subplots(figsize=(12, 2))
+        # Example events hardcoded at 2.75s and 7.12s
+        events = [2.75, 7.12]
+        # array of 60 rectangles
+        for i in range(60):
+            color = 'royalblue' if any(i <= e < i+1 for e in events) else 'lightgray'
+            ax.add_patch(plt.Rectangle((i, 0), 0.9, 1, color=color))
+        # markers for events
+        for e in events:
+            ax.plot(e, 0.5, 'ro', markersize=10)
+        # labels
+        ax.set_xlim(0, 60)
+        ax.set_ylim(0, 1)
+        ax.set_yticks([])
+        ax.set_xticks([0, 15, 30, 45, 60])
+        ax.set_xticklabels(['0s', '15s', '30s', '45s', '60s'])
+        ax.set_xlabel('Time (seconds)')
+        ax.set_title('One Minute Divided into 60 Second Intervals')
+        plt.tight_layout()
+        plt.gca()
+        return fig, events, i
+    # Create visualization and convert to image
+    _fig, _events, i = create_time_division_visualization()
+    _img = mo.image(fig_to_image(_fig), width="100%")
+    # explanation
+    _explanation = mo.md(
+        r"""
+        In this visualization:
+        - Each rectangle represents a 1-second interval
+        - Blue rectangles indicate intervals where an event occurred
+        - Red dots show the actual event times (2.75s and 7.12s)
+        If we treat this as a binomial experiment with 60 trials (seconds), we can calculate probabilities using the binomial PMF. But there's a problem: what if multiple events occur within the same second? To address this, we can divide our minute into smaller intervals.
+        """
+    )
+    mo.vstack([_fig, _explanation])
+    return create_time_division_visualization, i
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        The total number of requests received over the minute can be approximated as the sum of the sixty indicator variables, which conveniently matches the description of a binomial — a sum of Bernoullis.
+        Specifically, if we define $X$ to be the number of requests in a minute, $X$ is a binomial with $n=60$ trials. What is the probability, $p$, of a success on a single trial? To make the expectation of $X$ equal the observed historical average $\lambda$, we should choose $p$ so that:
+        \begin{align}
+        \lambda &= E[X] && \text{Expectation matches historical average} \\
+        \lambda &= n \cdot p && \text{Expectation of a Binomial is } n \cdot p \\
+        p &= \frac{\lambda}{n} && \text{Solving for $p$}
+        \end{align}
+        In this case, since $\lambda=5$ and $n=60$, we should choose $p=\frac{5}{60}=\frac{1}{12}$ and state that $X \sim \text{Bin}(n=60, p=\frac{5}{60})$. Now we can calculate the probability of different numbers of requests using the binomial PMF:
+        $P(X = x) = {n \choose x} p^x (1-p)^{n-x}$
+        For example:
+        \begin{align}
+        P(X=1) &= {60 \choose 1} (5/60)^1 (55/60)^{60-1} \approx 0.0295 \\
+        P(X=2) &= {60 \choose 2} (5/60)^2 (55/60)^{60-2} \approx 0.0790 \\
+        P(X=3) &= {60 \choose 3} (5/60)^3 (55/60)^{60-3} \approx 0.1389
+        \end{align}
+        This is a good approximation, but it doesn't account for the possibility of multiple events in a single second. One solution is to divide our minute into even more fine-grained intervals. Let's try 600 deciseconds (tenths of a second):
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(fig_to_image, mo, plt):
+    def create_decisecond_visualization(e_value):
+        # (Just showing the first 100 for clarity)
+        fig, ax = plt.subplots(figsize=(12, 2))
+        # Example events at 2.75s and 7.12s (convert to deciseconds)
+        events = [27.5, 71.2]
+        for i in range(100):
+            color = 'royalblue' if any(i <= event_val < i + 1 for event_val in events) else 'lightgray'
+            ax.add_patch(plt.Rectangle((i, 0), 0.9, 1, color=color))
+        # Markers for events
+        for event in events:
+            if event < 100:  # Only show events in our visible range
+                ax.plot(event/10, 0.5, 'ro', markersize=10)  # Divide by 10 to convert to deciseconds
+        # Add labels
+        ax.set_xlim(0, 100)
+        ax.set_ylim(0, 1)
+        ax.set_yticks([])
+        ax.set_xticks([0, 20, 40, 60, 80, 100])
+        ax.set_xticklabels(['0s', '2s', '4s', '6s', '8s', '10s'])
+        ax.set_xlabel('Time (first 10 seconds shown)')
+        ax.set_title('One Minute Divided into 600 Decisecond Intervals (first 100 shown)')
+        plt.tight_layout()
+        plt.gca()
+        return fig
+    # Create viz and convert to image
+    _fig = create_decisecond_visualization(e_value=5)
+    _img = mo.image(fig_to_image(_fig), width="100%")
+    # Explanation
+    _explanation = mo.md(
+        r"""
+        With $n=600$ and $p=\frac{5}{600}=\frac{1}{120}$, we can recalculate our probabilities:
+        \begin{align}
+        P(X=1) &= {600 \choose 1} (5/600)^1 (595/600)^{600-1} \approx 0.0333 \\
+        P(X=2) &= {600 \choose 2} (5/600)^2 (595/600)^{600-2} \approx 0.0837 \\
+        P(X=3) &= {600 \choose 3} (5/600)^3 (595/600)^{600-3} \approx 0.1402
+        \end{align}
+        As we make our intervals smaller (increasing $n$), our approximation becomes more accurate.
+        """
+    )
+    mo.vstack([_fig, _explanation])
+    return (create_decisecond_visualization,)
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## The Binomial Distribution in the Limit
+        What happens if we continue dividing our time interval into smaller and smaller pieces? Let's explore how the probabilities change as we increase the number of intervals:
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    intervals_slider = mo.ui.slider(
+        start = 60,
+        stop = 10000,
+        step=100,
+        value=600,
+        label="Number of intervals to divide a minute")
+    return (intervals_slider,)
+@app.cell(hide_code=True)
+def _(intervals_slider):
+    intervals_slider
+    return
+@app.cell(hide_code=True)
+def _(intervals_slider, np, pd, plt, stats):
+    def create_comparison_plot(n, lambda_value):
+        # Calculate probability
+        p = lambda_value / n
+        # Binomial probabilities
+        x_values = np.arange(0, 15)
+        binom_pmf = stats.binom.pmf(x_values, n, p)
+        # True Poisson probabilities
+        poisson_pmf = stats.poisson.pmf(x_values, lambda_value)
+        # DF for comparison
+        df = pd.DataFrame({
+            'Events': x_values,
+            f'Binomial(n={n}, p={p:.6f})': binom_pmf,
+            f'Poisson(λ=5)': poisson_pmf,
+            'Difference': np.abs(binom_pmf - poisson_pmf)
+        })
+        # Plot both PMFs
+        fig, ax = plt.subplots(figsize=(10, 6))
+        # Bar plot for the binomial
+        ax.bar(x_values - 0.2, binom_pmf, width=0.4, alpha=0.7,
+            color='royalblue', label=f'Binomial(n={n}, p={p:.6f})')
+        # Bar plot for the Poisson
+        ax.bar(x_values + 0.2, poisson_pmf, width=0.4, alpha=0.7,
+            color='crimson', label='Poisson(λ=5)')
+        # Labels and title
+        ax.set_xlabel('Number of Events (k)')
+        ax.set_ylabel('Probability')
+        ax.set_title(f'Comparison of Binomial and Poisson PMFs with n={n}')
+        ax.legend()
+        ax.set_xticks(x_values)
+        ax.grid(alpha=0.3)
+        plt.tight_layout()
+        return df, fig, n, p
+    # Number of intervals from the slider
+    n = intervals_slider.value
+    _lambda = 5  # Fixed lambda for our example
+    # Cromparison plot
+    df, fig, n, p = create_comparison_plot(n, _lambda)
+    return create_comparison_plot, df, fig, n, p
+@app.cell(hide_code=True)
+def _(df, fig, fig_to_image, mo, n, p):
+    # table of values
+    _styled_df = df.style.format({
+        f'Binomial(n={n}, p={p:.6f})': '{:.6f}',
+        f'Poisson(λ=5)': '{:.6f}',
+        'Difference': '{:.6f}'
+    })
+    # Calculate the max absolute difference
+    _max_diff = df['Difference'].max()
+    # output
+    _chart = mo.image(fig_to_image(fig), width="100%")
+    _explanation = mo.md(f"**Maximum absolute difference between distributions: {_max_diff:.6f}**")
+    _table = mo.ui.table(df)
+    mo.vstack([_chart, _explanation, _table])
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        As you can see from the interactive comparison above, as the number of intervals increases, the binomial distribution approaches the Poisson distribution! This is not a coincidence - the Poisson distribution is actually the limiting case of the binomial distribution when:
+        - The number of trials $n$ approaches infinity
+        - The probability of success $p$ approaches zero
+        - The product $np = \lambda$ remains constant
+        This relationship is why the Poisson distribution is so useful - it's easier to work with than a binomial with a very large number of trials and a very small probability of success.
+        ## Derivation of the Poisson PMF
+        Let's derive the Poisson PMF by taking the limit of the binomial PMF as $n \to \infty$. We start with:
+        $P(X=x) = \lim_{n \rightarrow \infty} {n \choose x} (\lambda / n)^x(1-\lambda/n)^{n-x}$
+        While this expression looks intimidating, it simplifies nicely:
+        \begin{align}
+        P(X=x)
+        &= \lim_{n \rightarrow \infty} {n \choose x} (\lambda / n)^x(1-\lambda/n)^{n-x}
+            && \text{Start: binomial in the limit}\\
+        &= \lim_{n \rightarrow \infty}
+            {n \choose x} \cdot
+            \frac{\lambda^x}{n^x} \cdot
+            \frac{(1-\lambda/n)^{n}}{(1-\lambda/n)^{x}}
+            && \text{Expanding the power terms} \\
+        &= \lim_{n \rightarrow \infty}
+            \frac{n!}{(n-x)!x!} \cdot
+            \frac{\lambda^x}{n^x} \cdot
+            \frac{(1-\lambda/n)^{n}}{(1-\lambda/n)^{x}}
+            && \text{Expanding the binomial term} \\
+        &= \lim_{n \rightarrow \infty}
+            \frac{n!}{(n-x)!x!} \cdot
+            \frac{\lambda^x}{n^x} \cdot
+            \frac{e^{-\lambda}}{(1-\lambda/n)^{x}}
+            && \text{Using limit rule } \lim_{n \rightarrow \infty}(1-\lambda/n)^{n} = e^{-\lambda}\\
+        &= \lim_{n \rightarrow \infty}
+            \frac{n!}{(n-x)!x!} \cdot
+            \frac{\lambda^x}{n^x} \cdot
+            \frac{e^{-\lambda}}{1}
+            && \text{As } n \to \infty \text{, } \lambda/n \to 0\\
+        &= \lim_{n \rightarrow \infty}
+            \frac{n!}{(n-x)!} \cdot
+            \frac{1}{x!} \cdot
+            \frac{\lambda^x}{n^x} \cdot
+            e^{-\lambda}
+            && \text{Rearranging terms}\\
+        &= \lim_{n \rightarrow \infty}
+            \frac{n^x}{1} \cdot
+            \frac{1}{x!} \cdot
+            \frac{\lambda^x}{n^x} \cdot
+            e^{-\lambda}
+            && \text{As } n \to \infty \text{, } \frac{n!}{(n-x)!} \approx n^x\\
+        &= \lim_{n \rightarrow \infty}
+            \frac{\lambda^x}{x!} \cdot
+            e^{-\lambda}
+            && \text{Canceling } n^x\\
+        &=
+            \frac{\lambda^x \cdot e^{-\lambda}}{x!}
+            && \text{Simplifying}\\
+        \end{align}
+        This gives us our elegant Poisson PMF formula: $P(X=x) = \frac{\lambda^x \cdot e^{-\lambda}}{x!}$
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Poisson Distribution in Python
+        Python's `scipy.stats` module provides functions to work with the Poisson distribution. Let's see how to calculate probabilities and generate random samples.
+        First, let's calculate some probabilities for our ride-sharing example with $\lambda = 5$:
+        """
+    )
+    return
+@app.cell
+def _(stats):
+    _lambda = 5
+    # Calculate probabilities for X = 1, 2, 3
+    p_1 = stats.poisson.pmf(1, _lambda)
+    p_2 = stats.poisson.pmf(2, _lambda)
+    p_3 = stats.poisson.pmf(3, _lambda)
+    print(f"P(X=1) = {p_1:.5f}")
+    print(f"P(X=2) = {p_2:.5f}")
+    print(f"P(X=3) = {p_3:.5f}")
+    # Calculate cumulative probability P(X ≤ 3)
+    p_leq_3 = stats.poisson.cdf(3, _lambda)
+    print(f"P(X≤3) = {p_leq_3:.5f}")
+    # Calculate probability P(X > 10)
+    p_gt_10 = 1 - stats.poisson.cdf(10, _lambda)
+    print(f"P(X>10) = {p_gt_10:.5f}")
+    return p_1, p_2, p_3, p_gt_10, p_leq_3
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""We can also generate random samples from a Poisson distribution and visualize their distribution:""")
+    return
+@app.cell(hide_code=True)
+def _(np, plt, stats):
+    def create_samples_plot(lambda_value, sample_size=1000):
+        # Random samples
+        samples = stats.poisson.rvs(lambda_value, size=sample_size)
+        # theoretical PMF
+        x_values = np.arange(0, max(samples) + 1)
+        pmf_values = stats.poisson.pmf(x_values, lambda_value)
+        # histograms to compare
+        fig, ax = plt.subplots(figsize=(10, 6))
+        # samples as a histogram
+        ax.hist(samples, bins=np.arange(-0.5, max(samples) + 1.5, 1),
+                alpha=0.7, density=True, label='Random Samples')
+        # theoretical PMF
+        ax.plot(x_values, pmf_values, 'ro-', label='Theoretical PMF')
+        # labels and title
+        ax.set_xlabel('Number of Events')
+        ax.set_ylabel('Relative Frequency / Probability')
+        ax.set_title(f'1000 Random Samples from Poisson(λ={lambda_value})')
+        ax.legend()
+        ax.grid(alpha=0.3)
+        # annotations
+        ax.annotate(f'Sample Mean: {np.mean(samples):.2f}',
+                    xy=(0.7, 0.9), xycoords='axes fraction',
+                    bbox=dict(boxstyle='round,pad=0.5', fc='yellow', alpha=0.3))
+        ax.annotate(f'Theoretical Mean: {lambda_value:.2f}',
+                    xy=(0.7, 0.8), xycoords='axes fraction',
+                    bbox=dict(boxstyle='round,pad=0.5', fc='lightgreen', alpha=0.3))
+        plt.tight_layout()
+        return plt.gca()
+    # Use a lambda value of 5 for this example
+    _lambda = 5
+    create_samples_plot(_lambda)
+    return (create_samples_plot,)
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Changing Time Frames
+        One important property of the Poisson distribution is that the rate parameter $\lambda$ scales linearly with the time interval. If events occur at a rate of $\lambda$ per unit time, then over a period of $t$ units, the rate parameter becomes $\lambda \cdot t$.
+        For example, if a website receives an average of 5 requests per minute, what is the distribution of requests over a 20-minute period?
+        The rate parameter for the 20-minute period would be $\lambda = 5 \cdot 20 = 100$ requests.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    rate_slider = mo.ui.slider(
+        start = 0.1,
+        stop = 10,
+        step=0.1,
+        value=5,
+        label="Rate per unit time (λ)"
+    )
+    time_slider = mo.ui.slider(
+        start = 1,
+        stop = 60,
+        step=1,
+        value=20,
+        label="Time period (t units)"
+    )
+    controls = mo.vstack([
+        mo.md("### Adjust Parameters to See How Time Scaling Works"),
+        mo.hstack([rate_slider, time_slider], justify="space-between")
+    ])
+    return controls, rate_slider, time_slider
+@app.cell
+def _(controls):
+    controls.center()
+    return
+@app.cell(hide_code=True)
+def _(mo, np, plt, rate_slider, stats, time_slider):
+    def create_time_scaling_plot(rate, time_period):
+        # scaled rate parameter
+        lambda_value = rate * time_period
+        # PMF for values
+        max_x = max(30, int(lambda_value * 1.5))
+        x = np.arange(0, max_x + 1)
+        pmf = stats.poisson.pmf(x, lambda_value)
+        # plot
+        fig, ax = plt.subplots(figsize=(10, 6))
+        # PMF as bars
+        ax.bar(x, pmf, color='royalblue', alpha=0.7,
+            label=f'PMF: Poisson(λ={lambda_value:.1f})')
+        # vertical line for mean
+        ax.axvline(x=lambda_value, color='red', linestyle='--', linewidth=2,
+                label=f'Mean = {lambda_value:.1f}')
+        # labels and title
+        ax.set_xlabel('Number of Events')
+        ax.set_ylabel('Probability')
+        ax.set_title(f'Poisson Distribution Over {time_period} Units (Rate = {rate}/unit)')
+        # better visualization if lambda is large
+        if lambda_value > 10:
+            ax.set_xlim(lambda_value - 4*np.sqrt(lambda_value),
+                         lambda_value + 4*np.sqrt(lambda_value))
+        ax.legend()
+        ax.grid(alpha=0.3)
+        plt.tight_layout()
+        # Create relevant info markdown
+        info_text = f"""
+        When the rate is **{rate}** events per unit time and we observe for **{time_period}** units:
+        - The expected number of events is **{lambda_value:.1f}**
+        - The variance is also **{lambda_value:.1f}**
+        - The standard deviation is **{np.sqrt(lambda_value):.2f}**
+        - P(X=0) = {stats.poisson.pmf(0, lambda_value):.4f} (probability of no events)
+        - P(X≥10) = {1 - stats.poisson.cdf(9, lambda_value):.4f} (probability of 10 or more events)
+        """
+        return plt.gca(), info_text
+    # parameters from sliders
+    _rate = rate_slider.value
+    _time = time_slider.value
+    # store
+    _plot, _info_text = create_time_scaling_plot(_rate, _time)
+    # Display info as markdown
+    info = mo.md(_info_text)
+    mo.vstack([_plot, info], justify="center")
+    return create_time_scaling_plot, info
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## 🤔 Test Your Understanding
+        Pick which of these statements about Poisson distributions you think are correct:
+        /// details | The variance of a Poisson distribution is always equal to its mean
+        ✅ Correct! For a Poisson distribution with parameter $\lambda$, both the mean and variance equal $\lambda$.
+        ///
+        /// details | The Poisson distribution can be used to model the number of successes in a fixed number of trials
+        ❌ Incorrect! That's the binomial distribution. The Poisson distribution models the number of events in a fixed interval of time or space, not a fixed number of trials.
+        ///
+        /// details | If $X \sim \text{Poisson}(\lambda_1)$ and $Y \sim \text{Poisson}(\lambda_2)$ are independent, then $X + Y \sim \text{Poisson}(\lambda_1 + \lambda_2)$
+        ✅ Correct! The sum of independent Poisson random variables is also a Poisson random variable with parameter equal to the sum of the individual parameters.
+        ///
+        /// details | As $\lambda$ increases, the Poisson distribution approaches a normal distribution
+        ✅ Correct! For large values of $\lambda$ (generally $\lambda > 10$), the Poisson distribution is approximately normal with mean $\lambda$ and variance $\lambda$.
+        ///
+        /// details | The probability of zero events in a Poisson process is always less than the probability of one event
+        ❌ Incorrect! For $\lambda < 1$, the probability of zero events ($e^{-\lambda}$) is actually greater than the probability of one event ($\lambda e^{-\lambda}$).
+        ///
+        /// details | The Poisson distribution has a single parameter $\lambda$, which always equals the average number of events per time period
+        ✅ Correct! The parameter $\lambda$ represents the average rate of events, and it uniquely defines the distribution.
+        ///
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Summary
+        The Poisson distribution is one of those incredibly useful tools that shows up all over the place. I've always found it fascinating how such a simple formula can model so many real-world phenomena - from website traffic to radioactive decay.
+        What makes the Poisson really cool is that it emerges naturally as we try to model rare events occurring over a continuous interval. Remember that visualization where we kept dividing time into smaller and smaller chunks? As we showed, when you take a binomial distribution and let the number of trials approach infinity while keeping the expected value constant, you end up with the elegant Poisson formula.
+        The key things to remember about the Poisson distribution:
+        - It models the number of events occurring in a fixed interval of time or space, assuming events happen at a constant average rate and independently of each other
+        - Its PMF is given by the elegantly simple formula $P(X=k) = \frac{\lambda^k e^{-\lambda}}{k!}$
+        - Both the mean and variance equal the parameter $\lambda$, which represents the average number of events per interval
+        - It's related to the binomial distribution as a limiting case when $n \to \infty$, $p \to 0$, and $np = \lambda$ remains constant
+        - The rate parameter scales linearly with the length of the interval - if events occur at rate $\lambda$ per unit time, then over $t$ units, the parameter becomes $\lambda t$
+        From modeling website traffic and customer arrivals to defects in manufacturing and radioactive decay, the Poisson distribution provides a powerful and mathematically elegant way to understand random occurrences in our world.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""Appendix code (helper functions, variables, etc.):""")
+    return
+@app.cell
+def _():
+    import marimo as mo
+    return (mo,)
+@app.cell(hide_code=True)
+def _():
+    import numpy as np
+    import matplotlib.pyplot as plt
+    import scipy.stats as stats
+    import pandas as pd
+    import altair as alt
+    from wigglystuff import TangleSlider
+    return TangleSlider, alt, np, pd, plt, stats
+@app.cell(hide_code=True)
+def _():
+    import io
+    import base64
+    from matplotlib.figure import Figure
+    # Helper function to convert mpl figure to an image format mo.image can hopefully handle
+    def fig_to_image(fig):
+        buf = io.BytesIO()
+        fig.savefig(buf, format='png')
+        buf.seek(0)
+        data = f"data:image/png;base64,{base64.b64encode(buf.read()).decode('utf-8')}"
+        return data
+    return Figure, base64, fig_to_image, io
+if __name__ == "__main__":
+    app.run()

probability/16_continuous_distribution.py ADDED Viewed

	@@ -0,0 +1,979 @@

+# /// script
+# requires-python = ">=3.11"
+# dependencies = [
+#     "marimo",
+#     "altair==5.5.0",
+#     "matplotlib==3.10.1",
+#     "numpy==2.2.4",
+#     "scipy==1.15.2",
+#     "sympy==1.13.3",
+#     "wigglystuff==0.1.10",
+#     "polars==1.26.0",
+# ]
+# ///
+import marimo
+__generated_with = "0.11.26"
+app = marimo.App(width="medium")
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        # Continuous Distributions
+        _This notebook is a computational companion to ["Probability for Computer Scientists"](https://chrispiech.github.io/probabilityForComputerScientists/en/part2/continuous/), by Stanford professor Chris Piech._
+        So far, all the random variables we've explored have been discrete, taking on only specific values (usually integers). Now we'll move into the world of **continuous random variables**, which can take on any real number value. Continuous random variables are used to model measurements with arbitrary precision like height, weight, time, and many natural phenomena.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## From Discrete to Continuous
+        To make the transition from discrete to continuous random variables, let's start with a thought experiment:
+        > Imagine you're running to catch a bus. You know you'll arrive at 2:15pm, but you don't know exactly when the bus will arrive. You want to model the bus arrival time (in minutes past 2pm) as a random variable $T$ so you can calculate the probability that you'll wait more than five minutes: $P(15 < T < 20)$.
+        This immediately highlights a key difference from discrete distributions. For discrete distributions, we described the probability that a random variable takes on exact values. But this doesn't make sense for continuous values like time.
+        For example:
+        - What's the probability the bus arrives at exactly 2:17pm and 12.12333911102389234 seconds?
+        - What's the probability of a child being born weighing exactly 3.523112342234 kilograms?
+        These questions don't have meaningful answers because real-world measurements can have infinite precision. The probability of a continuous random variable taking on any specific exact value is actually zero!
+        ### Visualizing the Transition
+        Let's visualize this transition from discrete to continuous:
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(fig_to_image, mo, np, plt):
+    def create_discretization_plot():
+        fig, axs = plt.subplots(1, 3, figsize=(15, 5))
+        # values from 0 to 30 minutes
+        x = np.linspace(0, 30, 1000)
+        # Triangular distribution peaked at 15 minutes)
+        y = np.where(x <= 15, x/15, (30-x)/15)
+        y = y / np.trapezoid(y, x)  # Normalize
+        # 5-minute chunks (first plot)
+        bins = np.arange(0, 31, 5)
+        hist, _ = np.histogram(x, bins=bins, weights=y)
+        width = bins[1] - bins[0]
+        axs[0].bar(bins[:-1], hist * width, width=width, alpha=0.7,
+                   color='royalblue', edgecolor='black')
+        axs[0].set_xlim(0, 30)
+        axs[0].set_title('5-Minute Intervals')
+        axs[0].set_xlabel('Minutes past 2pm')
+        axs[0].set_ylabel('Probability')
+        # 15-20 minute range more prominent
+        axs[0].bar([15], hist[3] * width, width=width, alpha=0.7,
+                   color='darkorange', edgecolor='black')
+        # 2.5-minute chunks (second plot)
+        bins = np.arange(0, 31, 2.5)
+        hist, _ = np.histogram(x, bins=bins, weights=y)
+        width = bins[1] - bins[0]
+        axs[1].bar(bins[:-1], hist * width, width=width, alpha=0.7,
+                   color='royalblue', edgecolor='black')
+        axs[1].set_xlim(0, 30)
+        axs[1].set_title('2.5-Minute Intervals')
+        axs[1].set_xlabel('Minutes past 2pm')
+        # Make 15-20 minute range more prominent
+        highlight_indices = [6, 7]
+        for idx in highlight_indices:
+            axs[1].bar([bins[idx]], hist[idx] * width, width=width, alpha=0.7,
+                       color='darkorange', edgecolor='black')
+        # Continuous distribution (third plot)
+        axs[2].plot(x, y, 'royalblue', linewidth=2)
+        axs[2].set_xlim(0, 30)
+        axs[2].set_title('Continuous Distribution')
+        axs[2].set_xlabel('Minutes past 2pm')
+        axs[2].set_ylabel('Probability Density')
+        # Highlight the AUC between 15 and 20
+        mask = (x >= 15) & (x <= 20)
+        axs[2].fill_between(x[mask], y[mask], color='darkorange', alpha=0.7)
+        # Mark 15-20 minute interval
+        for ax in axs:
+            ax.axvline(x=15, color='red', linestyle='--', alpha=0.5)
+            ax.axvline(x=20, color='red', linestyle='--', alpha=0.5)
+            ax.set_xticks([0, 5, 10, 15, 20, 25, 30])
+            ax.grid(alpha=0.3)
+        plt.tight_layout()
+        plt.gca()
+        return fig
+    # Plot creation & conversion
+    _fig = create_discretization_plot()
+    _img = mo.image(fig_to_image(_fig), width="100%")
+    _explanation = mo.md(
+        r"""
+        The figure above illustrates our transition from discrete to continuous thinking:
+        - **Left**: Time divided into 5-minute chunks, where the probability of the bus arriving between 15-20 minutes (highlighted in orange) is a single value.
+        - **Center**: Time divided into finer 2.5-minute chunks, where the 15-20 minute range consists of two chunks.
+        - **Right**: In the limit, we get a continuous probability density function where the probability is the area under the curve between 15 and 20 minutes.
+        As we make our chunks smaller and smaller, we eventually arrive at a smooth function that gives us the probability density at each point.
+        """
+    )
+    mo.vstack([_img, _explanation])
+    return (create_discretization_plot,)
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Probability Density Functions
+        In the world of discrete random variables, we used **Probability Mass Functions (PMFs)** to describe the probability of a random variable taking on specific values. In the continuous world, we need a different approach.
+        For continuous random variables, we use a **Probability Density Function (PDF)** which defines the relative likelihood that a random variable takes on a particular value. We traditionally denote the PDF with the symbol $f$ and write it as:
+        $$f(X=x) \quad \text{or simply} \quad f(x)$$
+        Where the lowercase $x$ implies that we're talking about the relative likelihood of a continuous random variable which is the uppercase $X$.
+        ### Key Properties of PDFs
+        A **Probability Density Function (PDF)** $f(x)$ for a continuous random variable $X$ has these key properties:
+        1. The probability that $X$ takes a value in the interval $[a, b]$ is:
+           $$P(a \leq X \leq b) = \int_a^b f(x) \, dx$$
+        2. The PDF must be non-negative everywhere:
+           $$f(x) \geq 0 \text{ for all } x$$
+        3. The total probability must sum to 1:
+           $$\int_{-\infty}^{\infty} f(x) \, dx = 1$$
+        4. The probability that $X$ takes any specific exact value is 0:
+           $$P(X = a) = \int_a^a f(x) \, dx = 0$$
+        This last property highlights a key difference from discrete distributions: the probability of a continuous random variable taking on an exact value is always 0. Probabilities only make sense when talking about ranges of values.
+        ### Caution: Density ≠ Probability
+        A common misconception is to think of $f(x)$ as a probability. It is instead a **probability density**, representing probability per unit of $x$. The values of $f(x)$ can actually exceed 1, as long as the total area under the curve equals 1.
+        The interpretation of $f(x)$ is only meaningful when:
+        1. We integrate over a range to get a probability, or
+        2. We compare densities at different points to determine relative likelihoods.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(TangleSlider, mo):
+    # Create sliders for a and b
+    a_slider = mo.ui.anywidget(TangleSlider(
+        amount=1,
+        min_value=0,
+        max_value=5,
+        step=0.1,
+        digits=1
+    ))
+    b_slider = mo.ui.anywidget(TangleSlider(
+        amount=3,
+        min_value=0,
+        max_value=5,
+        step=0.1,
+        digits=1
+    ))
+    # Distribution selector
+    distribution_radio = mo.ui.radio(
+        options=["uniform", "triangular", "exponential"],
+        value="uniform",
+        label="Distribution Type"
+    )
+    # Controls layout
+    _controls = mo.vstack([
+        mo.md("### Visualizing Probability as Area Under the PDF Curve"),
+        mo.md("Adjust sliders to change the interval $[a, b]$ and see how the probability changes:"),
+        mo.hstack([
+            mo.md("Lower bound (a):"),
+            a_slider,
+            mo.md("Upper bound (b):"),
+            b_slider
+        ], justify="start"),
+        distribution_radio
+    ])
+    _controls
+    return a_slider, b_slider, distribution_radio
+@app.cell(hide_code=True)
+def _(
+    a_slider,
+    b_slider,
+    create_pdf_visualization,
+    distribution_radio,
+    fig_to_image,
+    mo,
+):
+    a = a_slider.amount
+    b = b_slider.amount
+    distribution = distribution_radio.value
+    # Ensure a < b
+    if a > b:
+        a, b = b, a
+    # visualization
+    _fig, _probability = create_pdf_visualization(a, b, distribution)
+    # Display visualization
+    _img = mo.image(fig_to_image(_fig), width="100%")
+    # Add appropriate explanation
+    if distribution == "uniform":
+        _explanation = mo.md(
+            f"""
+            In the **uniform distribution**, all values between 0 and 5 are equally likely.
+            The probability density is constant at 0.2 (which is 1/5, ensuring the total area is 1).
+            For a uniform distribution, the probability that $X$ is in the interval $[{a:.1f}, {b:.1f}]$
+            is simply proportional to the width of the interval: $P({a:.1f} \leq X \leq {b:.1f}) = {_probability:.4f}$
+            Note that while the PDF has a constant value of 0.2, this is not a probability but a density!
+            """
+        )
+    elif distribution == "triangular":
+        _explanation = mo.md(
+            f"""
+            In this **triangular distribution**, the probability density increases linearly from 0 to 2.5,
+            then decreases linearly from 2.5 to 5.
+            The distribution's peak is at x = 2.5, where the value is highest.
+            The orange shaded area representing $P({a:.1f} \leq X \leq {b:.1f}) = {_probability:.4f}$
+            is calculated by integrating the PDF over the interval.
+            """
+        )
+    else:
+        _explanation = mo.md(
+            f"""
+            The **exponential distribution** (with λ = 0.5) models the time between events in a Poisson process.
+            Unlike the uniform and triangular distributions, the exponential distribution has infinite support
+            (extends from 0 to infinity). The probability density decreases exponentially as x increases.
+            The orange shaded area representing $P({a:.1f} \leq X \leq {b:.1f}) = {_probability:.4f}$
+            is calculated by integrating $f(x) = 0.5e^{{-0.5x}}$ over the interval.
+            """
+        )
+    mo.vstack([_img, _explanation])
+    return a, b, distribution
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Cumulative Distribution Function
+        Since working with PDFs requires solving integrals to find probabilities, we often use the **Cumulative Distribution Function (CDF)** as a more convenient tool.
+        The CDF $F(x)$ for a continuous random variable $X$ is defined as:
+        $$F(x) = P(X \leq x) = \int_{-\infty}^{x} f(t)\,dt$$
+        where $f(t)$ is the PDF of $X$.
+        ### Properties of CDFs
+        A CDF $F(x)$ has these key properties:
+        1. $F(x)$ is always non-decreasing: if $a < b$, then $F(a) \leq F(b)$
+        2. $\lim_{x \to -\infty} F(x) = 0$ and $\lim_{x \to \infty} F(x) = 1$
+        3. $F(x)$ is right-continuous: $\lim_{h \to 0^+} F(x+h) = F(x)$
+        ### Using the CDF to Calculate Probabilities
+        The CDF is extremely useful because it allows us to calculate various probabilities without having to perform integrals each time:
+        | Probability Query | Solution | Explanation |
+        |-------------------|----------|-------------|
+        | $P(X < a)$ | $F(a)$ | Definition of the CDF |
+        | $P(X \leq a)$ | $F(a)$ | For continuous distributions, $P(X = a) = 0$ |
+        | $P(X > a)$ | $1 - F(a)$ | Since $P(X \leq a) + P(X > a) = 1$ |
+        | $P(a < X < b)$ | $F(b) - F(a)$ | Since $F(a) + P(a < X < b) = F(b)$ |
+        | $P(a \leq X \leq b)$ | $F(b) - F(a)$ | Since $P(X = a) = P(X = b) = 0$ |
+        For discrete random variables, the CDF is also defined but it's less commonly used:
+        $$F_X(a) = \sum_{i \leq a} P(X = i)$$
+        The CDF for discrete distributions is a step function, increasing at each point in the support of the random variable.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(fig_to_image, mo, np, plt):
+    def create_pdf_cdf_comparison():
+        fig, axs = plt.subplots(3, 2, figsize=(12, 10))
+        # x-values
+        x = np.linspace(-1, 6, 1000)
+        # 1. Uniform Distribution
+        # PDF
+        pdf_uniform = np.where((x >= 0) & (x <= 5), 0.2, 0)
+        axs[0, 0].plot(x, pdf_uniform, 'b-', linewidth=2)
+        axs[0, 0].set_title('Uniform PDF')
+        axs[0, 0].set_ylabel('Density')
+        axs[0, 0].grid(alpha=0.3)
+        # CDF
+        cdf_uniform = np.zeros_like(x)
+        for i, val in enumerate(x):
+            if val < 0:
+                cdf_uniform[i] = 0
+            elif val > 5:
+                cdf_uniform[i] = 1
+            else:
+                cdf_uniform[i] = val / 5
+        axs[0, 1].plot(x, cdf_uniform, 'r-', linewidth=2)
+        axs[0, 1].set_title('Uniform CDF')
+        axs[0, 1].set_ylabel('Probability')
+        axs[0, 1].grid(alpha=0.3)
+        # 2. Triangular Distribution
+        # PDF
+        pdf_triangular = np.where(x <= 2.5, x/6.25, (5-x)/6.25)
+        pdf_triangular = np.where((x < 0) | (x > 5), 0, pdf_triangular)
+        axs[1, 0].plot(x, pdf_triangular, 'b-', linewidth=2)
+        axs[1, 0].set_title('Triangular PDF')
+        axs[1, 0].set_ylabel('Density')
+        axs[1, 0].grid(alpha=0.3)
+        # CDF
+        cdf_triangular = np.zeros_like(x)
+        for i, val in enumerate(x):
+            if val <= 0:
+                cdf_triangular[i] = 0
+            elif val >= 5:
+                cdf_triangular[i] = 1
+            else:
+                # For x ≤ 2.5: CDF = x²/(2 *6 .25)
+                # For x > 2.5: CDF = 1 - (5 - x)²/(2 * 6.25)
+                if val <= 2.5:
+                    cdf_triangular[i] = (val**2) / (2 * 6.25)
+                else:
+                    cdf_triangular[i] = 1 - ((5 - val)**2) / (2 * 6.25)
+        axs[1, 1].plot(x, cdf_triangular, 'r-', linewidth=2)
+        axs[1, 1].set_title('Triangular CDF')
+        axs[1, 1].set_ylabel('Probability')
+        axs[1, 1].grid(alpha=0.3)
+        # 3. Exponential Distribution
+        # PDF
+        lambda_param = 0.5
+        pdf_exponential = np.where(x >= 0, lambda_param * np.exp(-lambda_param * x), 0)
+        axs[2, 0].plot(x, pdf_exponential, 'b-', linewidth=2)
+        axs[2, 0].set_title('Exponential PDF (λ=0.5)')
+        axs[2, 0].set_xlabel('x')
+        axs[2, 0].set_ylabel('Density')
+        axs[2, 0].grid(alpha=0.3)
+        # CDF
+        cdf_exponential = np.where(x < 0, 0, 1 - np.exp(-lambda_param * x))
+        axs[2, 1].plot(x, cdf_exponential, 'r-', linewidth=2)
+        axs[2, 1].set_title('Exponential CDF (λ=0.5)')
+        axs[2, 1].set_xlabel('x')
+        axs[2, 1].set_ylabel('Probability')
+        axs[2, 1].grid(alpha=0.3)
+        # Common x-limits
+        for ax in axs.flatten():
+            ax.set_xlim(-0.5, 5.5)
+            if ax in axs[:, 0]:  # PDF plots
+                ax.set_ylim(-0.05, max(0.5, max(pdf_triangular)*1.1))
+            else:  # CDF plots
+                ax.set_ylim(-0.05, 1.05)
+        plt.tight_layout()
+        plt.gca()
+        return fig
+    # Create visualization
+    _fig = create_pdf_cdf_comparison()
+    _img = mo.image(fig_to_image(_fig), width="100%")
+    _explanation = mo.md(
+        r"""
+        The figure above compares the Probability Density Functions (PDFs) on the left with their corresponding Cumulative Distribution Functions (CDFs) on the right for three common distributions:
+        1. **Uniform Distribution**:
+           - PDF: Constant value (0.2) across the support range [0, 5]
+           - CDF: Linear increase from 0 to 1 across the support range
+        2. **Triangular Distribution**:
+           - PDF: Linearly increases then decreases, forming a triangle shape
+           - CDF: Increases quadratically up to the peak, then approaches 1 quadratically
+        3. **Exponential Distribution**:
+           - PDF: Starts at λ=0.5 and decreases exponentially
+           - CDF: Starts at 0 and approaches 1 exponentially (never quite reaching 1)
+        /// NOTE
+        The common properties of all CDFs:
+        - They are non-decreasing functions
+        - They start at 0 (for x = -∞) and approach or reach 1 (for x = ∞)
+        - The slope of the CDF at any point equals the PDF value at that point
+        """
+    )
+    mo.vstack([_img, _explanation])
+    return (create_pdf_cdf_comparison,)
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Solving for Constants in PDFs
+        Many PDFs contain a constant that needs to be determined to ensure the total probability equals 1. Let's work through an example to understand how to solve for these constants.
+        ### Example: Finding the Constant $C$
+        Let $X$ be a continuous random variable with PDF:
+        $$f(x) = \begin{cases}
+        C(4x - 2x^2) & \text{when } 0 < x < 2 \\
+        0 & \text{otherwise}
+        \end{cases}$$
+        In this function, $C$ is a constant we need to determine. Since we know the PDF must integrate to 1:
+        \begin{align}
+        &\int_0^2 C(4x - 2x^2) \, dx = 1 \\
+        &C\left(2x^2 - \frac{2x^3}{3}\right)\bigg|_0^2 = 1 \\
+        &C\left[\left(8 - \frac{16}{3}\right) - 0 \right] = 1 \\
+        &C\left(\frac{24 - 16}{3}\right) = 1 \\
+        &C\left(\frac{8}{3}\right) = 1 \\
+        &C = \frac{3}{8}
+        \end{align}
+        Now that we know $C = \frac{3}{8}$, we can compute probabilities. For example, what is $P(X > 1)$?
+        \begin{align}
+        P(X > 1)
+            &= \int_1^{\infty}f(x) \, dx \\
+            &= \int_1^2 \frac{3}{8}(4x - 2x^2) \, dx \\
+            &= \frac{3}{8}\left(2x^2 - \frac{2x^3}{3}\right)\bigg|_1^2 \\
+            &= \frac{3}{8}\left[\left(8 - \frac{16}{3}\right) - \left(2 - \frac{2}{3}\right)\right] \\
+            &= \frac{3}{8}\left[\left(8 - \frac{16}{3}\right) - \left(\frac{6 - 2}{3}\right)\right] \\
+            &= \frac{3}{8}\left[\left(\frac{24 - 16}{3}\right) - \left(\frac{4}{3}\right)\right] \\
+            &= \frac{3}{8}\left[\left(\frac{8}{3}\right) - \left(\frac{4}{3}\right)\right] \\
+            &= \frac{3}{8} \cdot \frac{4}{3} \\
+            &= \frac{1}{2}
+        \end{align}
+        Let's visualize this distribution and verify our results:
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(
+    create_example_pdf_visualization,
+    fig_to_image,
+    mo,
+    symbolic_calculation,
+):
+    # Create visualization
+    _fig = create_example_pdf_visualization()
+    _img = mo.image(fig_to_image(_fig), width="100%")
+    # Symbolic calculation
+    _sympy_verification = mo.md(symbolic_calculation())
+    _explanation = mo.md(
+        r"""
+        The figure above shows:
+        1. **Left**: The PDF $f(x) = \frac{3}{8}(4x - 2x^2)$ for $0 < x < 2$, with the area representing P(X > 1) shaded in orange.
+        2. **Right**: The corresponding CDF, showing F(1) = 0.5 and thus P(X > 1) = 1 - F(1) = 0.5.
+        Notice how we:
+        1. First determined the constant C = 3/8 by ensuring the total area under the PDF equals 1
+        2. Used this value to calculate specific probabilities like P(X > 1)
+        3. Verified our results both graphically and symbolically
+        """
+    )
+    mo.vstack([_img, _sympy_verification, _explanation])
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Expectation and Variance of Continuous Random Variables
+        Just as with discrete random variables, we can calculate the expectation and variance of continuous random variables. The main difference is that we use integrals instead of sums.
+        ### Expectation (Mean)
+        For a continuous random variable $X$ with PDF $f(x)$, the expectation is:
+        $$E[X] = \int_{-\infty}^{\infty} x \cdot f(x) \, dx$$
+        More generally, for any function $g(X)$:
+        $$E[g(X)] = \int_{-\infty}^{\infty} g(x) \cdot f(x) \, dx$$
+        ### Variance
+        The variance is defined the same way as for discrete random variables:
+        $$\text{Var}(X) = E[(X - \mu)^2] = E[X^2] - (E[X])^2$$
+        where $\mu = E[X]$ is the mean of $X$.
+        To calculate $E[X^2]$, we use:
+        $$E[X^2] = \int_{-\infty}^{\infty} x^2 \cdot f(x) \, dx$$
+        ### Properties
+        The following properties hold for both continuous and discrete random variables:
+        1. $E[aX + b] = aE[X] + b$ for constants $a$ and $b$
+        2. $\text{Var}(aX + b) = a^2 \text{Var}(X)$ for constants $a$ and $b$
+        Let's calculate the expectation and variance for our example PDF:
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(fig_to_image, mo, np, plt, sympy):
+    # Symbolic calculation of expectation and variance
+    def symbolic_stats_calc():
+        x = sympy.symbols('x')
+        C = sympy.Rational(3, 8)
+        # Define the PDF
+        pdf_expr = C * (4*x - 2*x**2)
+        # Calculate expectation
+        E_X = sympy.integrate(x * pdf_expr, (x, 0, 2))
+        # Calculate E[X²]
+        E_X2 = sympy.integrate(x**2 * pdf_expr, (x, 0, 2))
+        # Calculate variance
+        Var_X = E_X2 - E_X**2
+        # Calculate standard deviation
+        Std_X = sympy.sqrt(Var_X)
+        return E_X, E_X2, Var_X, Std_X
+    # Get symbolic results
+    E_X, E_X2, Var_X, Std_X = symbolic_stats_calc()
+    # Numerical values for plotting
+    E_X_val = float(E_X)
+    Var_X_val = float(Var_X)
+    Std_X_val = float(Std_X)
+    def create_expectation_variance_vis():
+        """Create visualization showing mean and variance for the example PDF."""
+        fig, ax = plt.subplots(figsize=(10, 6))
+        # x-values
+        x = np.linspace(-0.5, 2.5, 1000)
+        # PDF function
+        C = 3/8
+        pdf = np.where((x > 0) & (x < 2), C * (4*x - 2*x**2), 0)
+        # Plot the PDF
+        ax.plot(x, pdf, 'b-', linewidth=2, label='PDF')
+        ax.fill_between(x, pdf, where=(x > 0) & (x < 2), alpha=0.3, color='blue')
+        # Mark the mean
+        ax.axvline(x=E_X_val, color='r', linestyle='--', linewidth=2,
+                   label=f'Mean (E[X] = {E_X_val:.3f})')
+        # Mark the standard deviation range
+        ax.axvspan(E_X_val - Std_X_val, E_X_val + Std_X_val, alpha=0.2, color='green',
+                  label=f'±1 Std Dev ({Std_X_val:.3f})')
+        # Add labels and title
+        ax.set_xlabel('x')
+        ax.set_ylabel('Probability Density')
+        ax.set_title('PDF with Mean and Variance')
+        ax.legend()
+        ax.grid(alpha=0.3)
+        # Set x-limits
+        ax.set_xlim(-0.25, 2.25)
+        plt.tight_layout()
+        return fig
+    # Create the visualization
+    _fig = create_expectation_variance_vis()
+    _img = mo.image(fig_to_image(_fig), width="100%")
+    # Detailed calculations for our example
+    _calculations = mo.md(
+        f"""
+        ### Calculating Expectation and Variance for Our Example
+        Let's calculate the expectation and variance for the PDF:
+        $$f(x) = \\begin{{cases}}
+        \\frac{{3}}{{8}}(4x - 2x^2) & \\text{{when }} 0 < x < 2 \\\\
+        0 & \\text{{otherwise}}
+        \\end{{cases}}$$
+        #### Expectation Calculation
+        $$E[X] = \\int_{{-\\infty}}^{{\\infty}} x \\cdot f(x) \\, dx = \\int_0^2 x \\cdot \\frac{{3}}{{8}}(4x - 2x^2) \\, dx$$
+        $$E[X] = \\frac{{3}}{{8}} \\int_0^2 (4x^2 - 2x^3) \\, dx = \\frac{{3}}{{8}} \\left[ \\frac{{4x^3}}{{3}} - \\frac{{2x^4}}{{4}} \\right]_0^2$$
+        $$E[X] = \\frac{{3}}{{8}} \\left[ \\frac{{4 \\cdot 2^3}}{{3}} - \\frac{{2 \\cdot 2^4}}{{4}} - 0 \\right] = \\frac{{3}}{{8}} \\left[ \\frac{{32}}{{3}} - 4 \\right]$$
+        $$E[X] = \\frac{{3}}{{8}} \\cdot \\frac{{32 - 12}}{{3}} = \\frac{{3}}{{8}} \\cdot \\frac{{20}}{{3}} = \\frac{{20}}{{8}} = {E_X}$$
+        #### Variance Calculation
+        First, we need $E[X^2]$:
+        $$E[X^2] = \\int_{{-\\infty}}^{{\\infty}} x^2 \\cdot f(x) \\, dx = \\int_0^2 x^2 \\cdot \\frac{{3}}{{8}}(4x - 2x^2) \\, dx$$
+        $$E[X^2] = \\frac{{3}}{{8}} \\int_0^2 (4x^3 - 2x^4) \\, dx = \\frac{{3}}{{8}} \\left[ \\frac{{4x^4}}{{4}} - \\frac{{2x^5}}{{5}} \\right]_0^2$$
+        $$E[X^2] = \\frac{{3}}{{8}} \\left[ 4 - \\frac{{2 \\cdot 32}}{{5}} - 0 \\right] = \\frac{{3}}{{8}} \\left[ 4 - \\frac{{64}}{{5}} \\right]$$
+        $$E[X^2] = \\frac{{3}}{{8}} \\cdot \\frac{{20 - 64/5}}{{1}} = {E_X2}$$
+        Now we can calculate the variance:
+        $$\\text{{Var}}(X) = E[X^2] - (E[X])^2 = {E_X2} - ({E_X})^2 = {Var_X}$$
+        Therefore, the standard deviation is $\\sqrt{{\\text{{Var}}(X)}} = {Std_X}$.
+        """
+    )
+    mo.vstack([_img, _calculations])
+    return (
+        E_X,
+        E_X2,
+        E_X_val,
+        Std_X,
+        Std_X_val,
+        Var_X,
+        Var_X_val,
+        create_expectation_variance_vis,
+        symbolic_stats_calc,
+    )
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## 🤔 Test Your Understanding
+        Select which of these statements about continuous distributions you think are correct:
+        /// details | The PDF of a continuous random variable can have values greater than 1
+        ✅ Correct! Since the PDF represents density (not probability), it can exceed 1 as long as the total area under the curve equals 1.
+        ///
+        /// details | For a continuous distribution, $P(X = a) > 0$ for any value $a$ in the support
+        ❌ Incorrect! For continuous random variables, the probability of the random variable taking any specific exact value is always 0. That is, $P(X = a) = 0$ for any value $a$.
+        ///
+        /// details | The area under a PDF curve between $a$ and $b$ equals the probability $P(a \leq X \leq b)$
+        ✅ Correct! The area under the PDF curve over an interval gives the probability that the random variable falls within that interval.
+        ///
+        /// details | The CDF function $F(x)$ is always equal to $\int_{-\infty}^{x} f(t) \, dt$
+        ✅ Correct! The CDF at point $x$ is the integral of the PDF from negative infinity to $x$.
+        ///
+        /// details | For a continuous random variable, $F(x)$ ranges from 0 to the maximum value in the support of the random variable
+        ❌ Incorrect! The CDF $F(x)$ ranges from 0 to 1, representing probabilities. It approaches 1 (not the maximum value in the support) as $x$ approaches infinity.
+        ///
+        /// details | To calculate the variance of a continuous random variable, we use the formula $\text{Var}(X) = E[X^2] - (E[X])^2$
+        ✅ Correct! This formula applies to both discrete and continuous random variables.
+        ///
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Summary
+        Moving from discrete to continuous thinking is a big conceptual leap, but it opens up powerful ways to model real-world phenomena.
+        In this notebook, we've seen how continuous random variables let us model quantities that can take any real value. Instead of dealing with probabilities at specific points (which are actually zero!), we work with probability density functions (PDFs) and find probabilities by calculating areas under curves.
+        Some key points to remember:
+        • PDFs give us relative likelihood, not actual probabilities - that's why they can exceed 1
+        • The probability between two points is the area under the PDF curve
+        • CDFs offer a convenient shortcut to find probabilities without integrating
+        • Expectation and variance work similarly to discrete variables, just with integrals instead of sums
+        • Constants in PDFs are determined by ensuring the total probability equals 1
+        This foundation will serve you well as we explore specific continuous distributions like normal, exponential, and beta in future notebooks. These distributions are the workhorses of probability theory and statistics, appearing everywhere from quality control to financial modeling.
+        One final thought: continuous distributions are beautiful mathematical objects, but remember they're just models. Real-world data is often discrete at some level, but continuous distributions provide elegant approximations that make calculations more tractable.
+        """
+    )
+    return
+@app.cell
+def _(mo):
+    mo.md(r"""Appendix code (helper functions, variables, etc.):""")
+    return
+@app.cell
+def _():
+    import marimo as mo
+    return (mo,)
+@app.cell(hide_code=True)
+def _():
+    import numpy as np
+    import matplotlib.pyplot as plt
+    import scipy.stats as stats
+    import sympy
+    from scipy import integrate as scipy
+    import polars as pl
+    import altair as alt
+    from wigglystuff import TangleSlider
+    return TangleSlider, alt, np, pl, plt, scipy, stats, sympy
+@app.cell(hide_code=True)
+def _():
+    import io
+    import base64
+    from matplotlib.figure import Figure
+    # Helper function to convert mpl figure to an image format mo.image can handle
+    def fig_to_image(fig):
+        buf = io.BytesIO()
+        fig.savefig(buf, format='png')
+        buf.seek(0)
+        data = f"data:image/png;base64,{base64.b64encode(buf.read()).decode('utf-8')}"
+        return data
+    return Figure, base64, fig_to_image, io
+@app.cell(hide_code=True)
+def _(np, plt):
+    def create_pdf_visualization(a, b, distribution='uniform'):
+        fig, ax = plt.subplots(figsize=(10, 6))
+        # x-values
+        x = np.linspace(-0.5, 5.5, 1000)
+        # Various PDFs to visualize
+        if distribution == 'uniform':
+            # Uniform distribution from 0 to 5
+            y = np.where((x >= 0) & (x <= 5), 0.2, 0)
+            title = f"Uniform PDF from 0 to 5"
+        elif distribution == 'triangular':
+            # Triangular distribution peaked at 2.5
+            y = np.where(x <= 2.5, x/6.25, (5-x)/6.25)  # peak at 2.5
+            y = np.where((x < 0) | (x > 5), 0, y)
+            title = f"Triangular PDF from 0 to 5"
+        elif distribution == 'exponential':
+            lambda_param = 0.5
+            y = np.where(x >= 0, lambda_param * np.exp(-lambda_param * x), 0)
+            title = f"Exponential PDF with λ = {lambda_param}"
+        # Plot PDF
+        ax.plot(x, y, 'b-', linewidth=2, label='PDF $f(x)$')
+        # Shade the area for the probability P(a ≤ X ≤ b)
+        mask = (x >= a) & (x <= b)
+        ax.fill_between(x[mask], y[mask], color='orange', alpha=0.5)
+        # Calculate the probability
+        dx = x[1] - x[0]
+        probability = np.sum(y[mask]) * dx
+        # vertical lines at a and b
+        ax.axvline(x=a, color='r', linestyle='--', alpha=0.7,
+                   label=f'a = {a:.1f}')
+        ax.axvline(x=b, color='g', linestyle='--', alpha=0.7,
+                   label=f'b = {b:.1f}')
+        # horizontal line at y=0
+        ax.axhline(y=0, color='black', linestyle='-', alpha=0.3)
+        # labels and title
+        ax.set_xlabel('x')
+        ax.set_ylabel('Probability Density $f(x)$')
+        ax.set_title(title)
+        ax.legend(loc='upper right')
+        # relevant annotations
+        ax.annotate(f'$P({a:.1f} \leq X \leq {b:.1f}) = {probability:.4f}$',
+                    xy=(0.5, 0.9), xycoords='axes fraction',
+                    bbox=dict(boxstyle='round,pad=0.5', facecolor='white', alpha=0.8),
+                    horizontalalignment='center', fontsize=12)
+        plt.grid(alpha=0.3)
+        plt.tight_layout()
+        plt.gca()
+        return fig, probability
+    return (create_pdf_visualization,)
+@app.cell(hide_code=True)
+def _(np, plt, sympy):
+    def create_example_pdf_visualization():
+        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
+        # x-values
+        x = np.linspace(-0.5, 2.5, 1000)
+        # PDF function
+        C = 3/8
+        pdf = np.where((x > 0) & (x < 2), C * (4*x - 2*x**2), 0)
+        # CDF
+        cdf = np.zeros_like(x)
+        for i, val in enumerate(x):
+            if val <= 0:
+                cdf[i] = 0
+            elif val >= 2:
+                cdf[i] = 1
+            else:
+                # Analytical form: C*(2x^2 - 2x^3/3)
+                cdf[i] = C * (2*val**2 - (2*val**3)/3)
+        # PDF Plot
+        ax1.plot(x, pdf, 'b-', linewidth=2)
+        ax1.set_title('PDF: $f(x) = \\frac{3}{8}(4x - 2x^2)$ for $0 < x < 2$')
+        ax1.set_xlabel('x')
+        ax1.set_ylabel('Probability Density')
+        ax1.grid(alpha=0.3)
+        # Highlight the area for P(X > 1)
+        mask = (x > 1) & (x < 2)
+        ax1.fill_between(x[mask], pdf[mask], color='orange', alpha=0.5,
+                        label='P(X > 1) = 0.5')
+        # Add vertical line at x=1
+        ax1.axvline(x=1, color='r', linestyle='--', alpha=0.7)
+        ax1.legend()
+        # CDF Plot
+        ax2.plot(x, cdf, 'r-', linewidth=2)
+        ax2.set_title('CDF: $F(x)$ for the Example Distribution')
+        ax2.set_xlabel('x')
+        ax2.set_ylabel('Cumulative Probability')
+        ax2.grid(alpha=0.3)
+        # Mark appropriate (F(1) & F(2)) points)
+        ax2.plot(1, cdf[np.abs(x-1).argmin()], 'ro', markersize=8)
+        ax2.plot(2, cdf[np.abs(x-2).argmin()], 'ro', markersize=8)
+        # annotations
+        F_1 = C * (2*1**2 - (2*1**3)/3)  # F(1)
+        ax2.annotate(f'F(1) = {F_1:.3f}', xy=(1, F_1), xytext=(1.1, 0.4),
+                    arrowprops=dict(facecolor='black', shrink=0.05, width=1))
+        ax2.annotate(f'F(2) = 1', xy=(2, 1), xytext=(1.7, 0.8),
+                    arrowprops=dict(facecolor='black', shrink=0.05, width=1))
+        ax2.annotate(f'P(X > 1) = 1 - F(1) = {1-F_1:.3f}', xy=(1.5, 0.7),
+                    bbox=dict(boxstyle='round,pad=0.5', facecolor='orange', alpha=0.2))
+        # common x-limits
+        for ax in [ax1, ax2]:
+            ax.set_xlim(-0.25, 2.25)
+        plt.tight_layout()
+        plt.gca()
+        return fig
+    def symbolic_calculation():
+        x = sympy.symbols('x')
+        C = sympy.Rational(3, 8)
+        # PDF defn
+        pdf_expr = C * (4*x - 2*x**2)
+        # Verify PDF integrates to 1
+        total_prob = sympy.integrate(pdf_expr, (x, 0, 2))
+        # Calculate P(X > 1)
+        prob_gt_1 = sympy.integrate(pdf_expr, (x, 1, 2))
+        return f"""Symbolic calculation verification:
+        1. Total probability: ∫₀² {C}(4x - 2x²) dx = {total_prob}
+        2. P(X > 1): ∫₁² {C}(4x - 2x²) dx = {prob_gt_1}
+        """
+    return create_example_pdf_visualization, symbolic_calculation
+if __name__ == "__main__":
+    app.run()

probability/17_normal_distribution.py ADDED Viewed

	@@ -0,0 +1,1127 @@

+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "marimo",
+#     "matplotlib==3.10.1",
+#     "scipy==1.15.2",
+#     "wigglystuff==0.1.10",
+#     "numpy==2.2.4",
+# ]
+# ///
+import marimo
+__generated_with = "0.11.26"
+app = marimo.App(width="medium", app_title="Normal Distribution")
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        # Normal Distribution
+        _This notebook is a computational companion to ["Probability for Computer Scientists"](https://chrispiech.github.io/probabilityForComputerScientists/en/part2/normal/), by Stanford professor Chris Piech._
+        The Normal (also known as Gaussian) distribution is one of the most important probability distributions in statistics and data science. It's characterized by a symmetric bell-shaped curve and is fully defined by two parameters: mean (μ) and variance (σ²).
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Normal Random Variable Definition
+        The Normal (or Gaussian) random variable is denoted as:
+        $$X \sim \mathcal{N}(\mu, \sigma^2)$$
+        Where:
+        - $X$ is our random variable
+        - $\mathcal{N}$ indicates it follows a Normal distribution
+        - $\mu$ is the mean parameter
+        - $\sigma^2$ is the variance parameter (sometimes written as $\sigma$ for standard deviation)
+        ```
+        X ~ N(μ, σ²)
+         ↑   ↑  ↑  ↑
+         |   |  |  +-- Variance (spread)
+         |   |  |      of the distribution
+         |   |  +-- Mean (center)
+         |   |      of the distribution
+         |   +-- Indicates Normal
+         |      distribution
+         |
+        Our random variable
+        ```
+        The Normal distribution is particularly important for many reasons:
+        1. It arises naturally from the sum of independent random variables (Central Limit Theorem)
+        2. It appears frequently in natural phenomena
+        3. It is the maximum entropy distribution given a fixed mean and variance
+        4. It simplifies many mathematical calculations in statistics and probability
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Properties of Normal Distribution
+        | Property | Formula |
+        |----------|---------|
+        | Notation | $X \sim \mathcal{N}(\mu, \sigma^2)$ |
+        | Description | A common, naturally occurring distribution |
+        | Parameters | $\mu \in \mathbb{R}$, the mean<br>$\sigma^2 \in \mathbb{R}^+$, the variance |
+        | Support | $x \in \mathbb{R}$ |
+        | PDF equation | $f(x) = \frac{1}{\sigma\sqrt{2\pi}}e^{-\frac{1}{2}(\frac{x-\mu}{\sigma})^2}$ |
+        | CDF equation | $F(x) = \Phi(\frac{x-\mu}{\sigma})$ where $\Phi$ is the CDF of the standard normal |
+        | Expectation | $E[X] = \mu$ |
+        | Variance | $\text{Var}(X) = \sigma^2$ |
+        The PDF (Probability Density Function) reaches its maximum value at $x = \mu$, where the exponent becomes zero and $e^0 = 1$.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mean_slider, mo, std_slider):
+    mo.md(
+        f"""
+        The figure below shows a comparison between:
+        - The **Standard Normal Distribution** (purple curve): N(0, 1)
+        - A **Normal Distribution** with the parameters you selected (blue curve)
+        Adjust the mean (μ) {mean_slider} and standard deviation (σ) {std_slider} below to see how the normal distribution changes shape.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(
+    create_distribution_comparison,
+    fig_to_image,
+    mean_slider,
+    mo,
+    std_slider,
+):
+    # values from the sliders
+    current_mu = mean_slider.amount
+    current_sigma = std_slider.amount
+    # Create plot
+    comparison_fig = create_distribution_comparison(current_mu, current_sigma)
+    # Call, convert and display
+    comp_image = mo.image(fig_to_image(comparison_fig), width="100%")
+    comp_image
+    return comp_image, comparison_fig, current_mu, current_sigma
+@app.cell(hide_code=True)
+def _(mean_slider, mo, std_slider):
+    mo.md(
+        f"""
+        ## Interactive Normal Distribution Visualization
+            The shape of a normal distribution is determined by two key parameters:
+        - The **mean (μ):** {mean_slider} controls the center of the distribution.
+        - The **standard deviation (σ):** {std_slider} controls the spread (width) of the distribution.
+        Try adjusting these parameters to see how they affect the shape of the distribution below:
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(create_normal_pdf_plot, fig_to_image, mean_slider, mo, std_slider):
+    # value from widgets
+    _current_mu = mean_slider.amount
+    _current_sigma = std_slider.amount
+    # Create visualization
+    pdf_fig = create_normal_pdf_plot(_current_mu, _current_sigma)
+    # Display plot
+    pdf_image = mo.image(fig_to_image(pdf_fig), width="100%")
+    pdf_explanation = mo.md(
+        r"""
+        **Understanding the Normal Distribution Visualization:**
+        - **PDF (top)**: The probability density function shows the relative likelihood of different values.
+          The highest point occurs at the mean (μ).
+            - **Shaded regions**: The green shaded areas represent:
+                  - μ ± 1σ: Contains approximately 68.3% of the probability
+                  - μ ± 2σ: Contains approximately 95.5% of the probability
+                  - μ ± 3σ: Contains approximately 99.7% of the probability (the "68-95-99.7 rule")
+        - **CDF (bottom)**: The cumulative distribution function shows the probability that X is less than or equal to a given value.
+              - At x = μ, the CDF equals 0.5 (50% probability)
+              - At x = μ + σ, the CDF equals approximately 0.84 (84% probability)
+              - At x = μ - σ, the CDF equals approximately 0.16 (16% probability)
+        """
+    )
+    mo.vstack([pdf_image, pdf_explanation])
+    return pdf_explanation, pdf_fig, pdf_image
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Standard Normal Distribution
+        The **Standard Normal Distribution** is a special case of the normal distribution where $\mu = 0$ and $\sigma = 1$. We denote it as:
+        $$Z \sim \mathcal{N}(0, 1)$$
+        This distribution is particularly important because:
+        1. Any normal distribution can be transformed into the standard normal
+        2. Statistical tables and calculations often use the standard normal as a reference
+        ### Standardizing a Normal Random Variable
+        For any normal random variable $X \sim \mathcal{N}(\mu, \sigma^2)$, we can transform it to the standard normal $Z$ using:
+        $$Z = \frac{X - \mu}{\sigma}$$
+        Let's see the mathematical derivation:
+        \begin{align*}
+        W &= \frac{X -\mu}{\sigma} && \text{Subtract by $\mu$ and divide by $\sigma$} \\
+          &= \frac{1}{\sigma}X - \frac{\mu}{\sigma} && \text{Use algebra to rewrite the equation}\\
+          &= aX + b && \text{Linear transform where $a = \frac{1}{\sigma}$, $b = -\frac{\mu}{\sigma}$}\\
+          &\sim \mathcal{N}(a\mu + b, a^2\sigma^2) && \text{The linear transform of a Normal is another Normal}\\
+          &\sim \mathcal{N}\left(\frac{\mu}{\sigma} - \frac{\mu}{\sigma}, \frac{\sigma^2}{\sigma^2}\right) && \text{Substitute values for $a$ and $b$}\\
+          &\sim \mathcal{N}(0, 1) && \text{The standard normal}
+        \end{align*}
+        This transformation is the foundation for many statistical tests and probability calculations.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(create_standardization_plot, fig_to_image, mo):
+    # Create and display visualization
+    stand_fig = create_standardization_plot()
+    # Display
+    stand_image = mo.image(fig_to_image(stand_fig), width="100%")
+    stand_explanation = mo.md(
+        r"""
+        **Standardizing a Normal Distribution: A Two-Step Process**
+        The visualization above shows the process of transforming any normal distribution to the standard normal:
+        1. **Shift the distribution** (left plot): First, we subtract the mean (μ) from X, centering the distribution at 0.
+        2. **Scale the distribution** (right plot): Next, we divide by the standard deviation (σ), which adjusts the spread to match the standard normal.
+        The resulting standard normal distribution Z ~ N(0,1) has a mean of 0 and a variance of 1.
+        This transformation allows us to use standardized tables and calculations for any normal distribution.
+        """
+    )
+    mo.vstack([stand_image, stand_explanation])
+    return stand_explanation, stand_fig, stand_image
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Linear Transformations of Normal Variables
+        One useful property of the normal distribution is that linear transformations of normal random variables remain normal.
+        If $X \sim \mathcal{N}(\mu, \sigma^2)$ and $Y = aX + b$ (where $a$ and $b$ are constants), then:
+        $$Y \sim \mathcal{N}(a\mu + b, a^2\sigma^2)$$
+        This means:
+        - The mean is transformed by $a\mu + b$
+        - The variance is transformed by $a^2\sigma^2$
+        This property is extremely useful in statistics and probability calculations, as it allows us to easily determine the _distribution_ of transformed variables.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Calculating Probabilities with the Normal CDF
+        Unlike many other distributions, the normal distribution does not have a closed-form expression for its CDF. However, we can use the standard normal CDF (denoted as $\Phi$) to calculate probabilities.
+        For any normal random variable $X \sim \mathcal{N}(\mu, \sigma^2)$, the CDF is:
+        $$F_X(x) = P(X \leq x) = \Phi\left(\frac{x - \mu}{\sigma}\right)$$
+        Where $\Phi$ is the CDF of the standard normal distribution.
+        ### Derivation
+        \begin{align*}
+        F_X(x) &= P(X \leq x) \\
+        &= P\left(\frac{X - \mu}{\sigma} \leq \frac{x - \mu}{\sigma}\right) \\
+        &= P\left(Z \leq \frac{x - \mu}{\sigma}\right) \\
+        &= \Phi\left(\frac{x - \mu}{\sigma}\right)
+        \end{align*}
+        Let's look at some examples of calculating probabilities with normal distributions.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md("""## Examples of Normal Distributions""")
+    return
+@app.cell(hide_code=True)
+def _(create_probability_example, fig_to_image, mo):
+    # Create visualization
+    default_mu = 3
+    default_sigma = 4
+    default_query = 0
+    prob_fig, prob_value, ex_z_score = create_probability_example(default_mu, default_sigma, default_query)
+    # Display
+    prob_image = mo.image(fig_to_image(prob_fig), width="100%")
+    prob_explanation = mo.md(
+        f"""
+        **Example: Let X ~ N(3, 16), what is P(X > 0)?**
+        To solve this probability question:
+        1. First, we standardize the query value:
+           Z = (x - μ) / σ = (0 - 3) / 4 = -0.75
+        2. Then we calculate using the standard normal CDF:
+           P(X > 0) = P(Z > -0.75) = 1 - P(Z ≤ -0.75) = 1 - Φ(-0.75)
+        3. Because the standard normal is symmetric:
+           1 - Φ(-0.75) = Φ(0.75) = {prob_value:.3f}
+        The shaded orange area in the graph represents this probability of approximately {prob_value:.3f}.
+        """
+    )
+    mo.vstack([prob_image, prob_explanation])
+    return (
+        default_mu,
+        default_query,
+        default_sigma,
+        ex_z_score,
+        prob_explanation,
+        prob_fig,
+        prob_image,
+        prob_value,
+    )
+@app.cell(hide_code=True)
+def _(create_range_probability_example, fig_to_image, mo, stats):
+    # Create visualization
+    default_range_mu = 3
+    default_range_sigma = 4
+    default_range_lower = 2
+    default_range_upper = 5
+    range_fig, range_prob, range_z_lower, range_z_upper = create_range_probability_example(
+        default_range_mu, default_range_sigma, default_range_lower, default_range_upper)
+    # Display
+    range_image = mo.image(fig_to_image(range_fig), width="100%")
+    range_explanation = mo.md(
+        f"""
+        **Example: Let X ~ N(3, 16), what is P(2 < X < 5)?**
+        To solve this range probability question:
+        1. First, we standardize both bounds:
+           Z_lower = (lower - μ) / σ = (2 - 3) / 4 = -0.25
+           Z_upper = (upper - μ) / σ = (5 - 3) / 4 = 0.5
+        2. Then we calculate using the standard normal CDF:
+           P(2 < X < 5) = P(-0.25 < Z < 0.5)
+           = Φ(0.5) - Φ(-0.25)
+           = Φ(0.5) - (1 - Φ(0.25))
+           = Φ(0.5) + Φ(0.25) - 1
+        3. Computing these values:
+           = {stats.norm.cdf(0.5):.3f} + {stats.norm.cdf(0.25):.3f} - 1
+           = {range_prob:.3f}
+        The shaded orange area in the graph represents this probability of approximately {range_prob:.3f}.
+        """
+    )
+    mo.vstack([range_image, range_explanation])
+    return (
+        default_range_lower,
+        default_range_mu,
+        default_range_sigma,
+        default_range_upper,
+        range_explanation,
+        range_fig,
+        range_image,
+        range_prob,
+        range_z_lower,
+        range_z_upper,
+    )
+@app.cell(hide_code=True)
+def _(create_voltage_example_visualization, fig_to_image, mo):
+    # Create visualization
+    voltage_fig, voltage_error_prob = create_voltage_example_visualization()
+    # Display
+    voltage_image = mo.image(fig_to_image(voltage_fig), width="100%")
+    voltage_explanation = mo.md(
+        r"""
+        **Example: Signal Transmission with Noise**
+        In this example, we're sending digital signals over a wire:
+        - We send voltage 2 to represent a binary "1"
+        - We send voltage -2 to represent a binary "0"
+        The received signal R is the sum of the transmitted voltage (X) and random noise (Y):
+        R = X + Y, where Y ~ N(0, 1)
+        When decoding, we use a threshold of 0.5:
+        - If R ≥ 0.5, we interpret it as "1"
+        - If R < 0.5, we interpret it as "0"
+        Let's calculate the probability of error when sending a "1" (voltage = 2):
+        \begin{align*}
+        P(\text{Error when sending "1"}) &= P(X + Y < 0.5) \\
+        &= P(2 + Y < 0.5) \\
+        &= P(Y < -1.5) \\
+        &= \Phi(-1.5) \\
+        &\approx 0.067
+        \end{align*}
+        Therefore, the probability of incorrectly decoding a transmitted "1" as "0" is approximately 6.7%.
+        The orange shaded area in the plot represents this error probability.
+        """
+    )
+    mo.vstack([voltage_image, voltage_explanation])
+    return voltage_error_prob, voltage_explanation, voltage_fig, voltage_image
+@app.cell(hide_code=True)
+def emirical_rule(mo):
+    mo.md(
+        r"""
+        ## The 68-95-99.7 Rule (Empirical Rule)
+        One of the most useful properties of the normal distribution is the "[68-95-99.7 rule](https://en.wikipedia.org/wiki/68-95-99.7_rule)," which states that:
+        - Approximately 68% of the data falls within 1 standard deviation of the mean
+        - Approximately 95% of the data falls within 2 standard deviations of the mean
+        - Approximately 99.7% of the data falls within 3 standard deviations of the mean
+        Let's verify this with a calculation for the 68% rule:
+        \begin{align}
+        P(\mu - \sigma < X < \mu + \sigma)
+        &= P(X < \mu + \sigma) - P(X < \mu - \sigma) \\
+        &= \Phi\left(\frac{(\mu + \sigma)-\mu}{\sigma}\right) - \Phi\left(\frac{(\mu - \sigma)-\mu}{\sigma}\right) \\
+        &= \Phi\left(\frac{\sigma}{\sigma}\right) - \Phi\left(\frac{-\sigma}{\sigma}\right) \\
+        &= \Phi(1) - \Phi(-1) \\
+        &\approx 0.8413 - 0.1587 \\
+        &\approx 0.6826 \approx 68.3\%
+        \end{align}
+        This calculation works for any normal distribution, regardless of the values of $\mu$ and $\sigma$!
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""The Cumulative Distribution Function (CDF) gives the probability that a random variable is less than or equal to a specific value. Use the interactive calculator below to compute CDF values for a normal distribution.""")
+    return
+@app.cell(hide_code=True)
+def _(mo, mu_slider, sigma_slider, x_slider):
+    mo.md(
+        f"""
+        ## Interactive Normal CDF Calculator
+        Use the sliders below to explore different probability calculations:
+        **Query value (x):** {x_slider} — The value at which to evaluate F(x) = P(X ≤ x)
+        **Mean (μ):** {mu_slider} — The center of the distribution
+        **Standard deviation (σ):** {sigma_slider} — The spread of the distribution (larger σ means more spread)
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(
+    create_cdf_calculator_plot,
+    fig_to_image,
+    mo,
+    mu_slider,
+    sigma_slider,
+    x_slider,
+):
+    # Values from widgets
+    calc_x = x_slider.amount
+    calc_mu = mu_slider.amount
+    calc_sigma = sigma_slider.amount
+    # Create visualization
+    calc_fig, cdf_value = create_cdf_calculator_plot(calc_x, calc_mu, calc_sigma)
+    # Standardized z-score
+    calc_z_score = (calc_x - calc_mu) / calc_sigma
+    # Display
+    calc_image = mo.image(fig_to_image(calc_fig), width="100%")
+    calc_result = mo.md(
+        f"""
+        ### Results:
+        For a Normal distribution with parameters μ = {calc_mu:.1f} and σ = {calc_sigma:.1f}:
+        - The value x = {calc_x:.1f} corresponds to a z-score of z = {calc_z_score:.3f}
+        - The CDF value F({calc_x:.1f}) = P(X ≤ {calc_x:.1f}) = {cdf_value:.3f}
+        - This means the probability that X is less than or equal to {calc_x:.1f} is {cdf_value*100:.1f}%
+        **Computing this in Python:**
+        ```python
+        from scipy import stats
+        # Using the one-line method
+        p = stats.norm.cdf({calc_x:.1f}, {calc_mu:.1f}, {calc_sigma:.1f})
+        # OR using the two-line method
+        X = stats.norm({calc_mu:.1f}, {calc_sigma:.1f})
+        p = X.cdf({calc_x:.1f})
+        ```
+        **Note:** In SciPy's `stats.norm`, the second parameter is the standard deviation (σ), not the variance (σ²).
+        """
+    )
+    mo.vstack([calc_image, calc_result])
+    return (
+        calc_fig,
+        calc_image,
+        calc_mu,
+        calc_result,
+        calc_sigma,
+        calc_x,
+        calc_z_score,
+        cdf_value,
+    )
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## 🤔 Test Your Understanding
+        Test your knowledge with these true/false questions about normal distributions:
+        /// details | For a normal random variable X ~ N(μ, σ²), the probability that X takes on exactly the value μ is highest among all possible values.
+        **✅ True**
+        While the PDF is indeed highest at x = μ, making this the most likely value in terms of density, remember that for continuous random variables, the probability of any exact value is zero. The statement refers to the density function being maximized at the mean.
+        ///
+        /// details | The probability that a normal random variable X equals any specific exact value (e.g., P(X = 3)) is always zero.
+        **✅ True**
+        For continuous random variables including the normal, the probability of any exact value is zero. Probabilities only make sense for ranges of values, which is why we integrate the PDF over intervals.
+        ///
+        /// details | If X ~ N(μ, σ²), then aX + b ~ N(aμ + b, a²σ²) for any constants a and b.
+        **✅ True**
+        Linear transformations of normal random variables remain normal, with the given transformation of the parameters. This is a key property that makes normal distributions particularly useful.
+        ///
+        /// details | If X ~ N(5, 9) and Y ~ N(3, 4) are independent, then X + Y ~ N(8, 5).
+        **❌ False**
+        While the mean of the sum is indeed the sum of the means (5 + 3 = 8), the variance of the sum is the sum of the variances (9 + 4 = 13), not 5. The correct distribution would be X + Y ~ N(8, 13).
+        ///
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Summary
+        We've taken a tour of Normal distributions; probably the most famous probability distribution you'll encounter in statistics. It's that nice bell-shaped curve that shows up everywhere from heights/ weights to memes to measurement errors & stock returns.
+        The Normal distribution isn't just pretty — it's incredibly practical. With just two parameters (mean and standard deviation), you can describe complex phenomena and make powerful predictions. Plus, thanks to the Central Limit Theorem, many random processes naturally converge to this distribution, which is why it's so prevalent.
+        **What we covered:**
+        - The mathematical definition and key properties of Normal random variables
+        - How to transform any Normal distribution to the standard Normal
+        - Calculating probabilities using the CDF (no more looking up values in those tiny tables in the back of textbooks or Clark's table!)
+        Whether you're analyzing data, designing experiments, or building ML models, the concepts we explored provide a solid foundation for working with this fundamental distribution.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""Appendix (helper code and functions)""")
+    return
+@app.cell
+def _():
+    import marimo as mo
+    return (mo,)
+@app.cell(hide_code=True)
+def _():
+    from wigglystuff import TangleSlider
+    return (TangleSlider,)
+@app.cell(hide_code=True)
+def _(np, plt, stats):
+    def create_normal_pdf_plot(mu, sigma):
+        # Range for x values (show μ ± 4σ)
+        x = np.linspace(mu - 4*sigma, mu + 4*sigma, 1000)
+        pdf = stats.norm.pdf(x, mu, sigma)
+        # Calculate CDF values
+        cdf = stats.norm.cdf(x, mu, sigma)
+        # Create plot with two subplots for (PDF and CDF)
+        pdf_fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))
+        # PDF plot
+        ax1.plot(x, pdf, color='royalblue', linewidth=2, label='PDF')
+        ax1.fill_between(x, pdf, color='royalblue', alpha=0.2)
+        # Vertical line at mean
+        ax1.axvline(x=mu, color='red', linestyle='--', linewidth=1.5,
+                   label=f'Mean: μ = {mu:.1f}')
+        # Stdev regions
+        for i in range(1, 4):
+            alpha = 0.1 if i > 1 else 0.2
+            percentage = 100*stats.norm.cdf(i) - 100*stats.norm.cdf(-i)
+            label = f'μ ± {i}σ: {percentage:.1f}%' if i == 1 else None
+            ax1.axvspan(mu - i*sigma, mu + i*sigma, alpha=alpha, color='green',
+                       label=label)
+        # Annotations
+        ax1.annotate(f'μ = {mu:.1f}', xy=(mu, max(pdf)*0.15), xytext=(mu+0.5*sigma, max(pdf)*0.4),
+                    arrowprops=dict(facecolor='black', width=1, shrink=0.05))
+        ax1.annotate(f'σ = {sigma:.1f}',
+                    xy=(mu+sigma, stats.norm.pdf(mu+sigma, mu, sigma)),
+                    xytext=(mu+1.5*sigma, stats.norm.pdf(mu+sigma, mu, sigma)*1.5),
+                    arrowprops=dict(facecolor='black', width=1, shrink=0.05))
+        # some styling
+        ax1.set_title(f'Normal Distribution PDF: N({mu:.1f}, {sigma:.1f}²)')
+        ax1.set_xlabel('x')
+        ax1.set_ylabel('Probability Density: f(x)')
+        ax1.legend(loc='upper right')
+        ax1.grid(alpha=0.3)
+        # CDF plot
+        ax2.plot(x, cdf, color='darkorange', linewidth=2, label='CDF')
+        # key CDF values mark
+        key_points = [
+            (mu-sigma, stats.norm.cdf(mu-sigma, mu, sigma), "16%"),
+            (mu, 0.5, "50%"),
+            (mu+sigma, stats.norm.cdf(mu+sigma, mu, sigma), "84%")
+        ]
+        for point, value, label in key_points:
+            ax2.plot(point, value, 'ro')
+            ax2.annotate(f'{label}',
+                        xy=(point, value),
+                        xytext=(point+0.2*sigma, value-0.1),
+                        arrowprops=dict(facecolor='black', width=1, shrink=0.05))
+        # CDF styling
+        ax2.set_title(f'Normal Distribution CDF: N({mu:.1f}, {sigma:.1f}²)')
+        ax2.set_xlabel('x')
+        ax2.set_ylabel('Cumulative Probability: F(x)')
+        ax2.grid(alpha=0.3)
+        plt.tight_layout()
+        return pdf_fig
+    return (create_normal_pdf_plot,)
+@app.cell(hide_code=True)
+def _(base64, io):
+    from matplotlib.figure import Figure
+    # convert matplotlib figures to images (helper code)
+    def fig_to_image(fig):
+        buf = io.BytesIO()
+        fig.savefig(buf, format='png', bbox_inches='tight')
+        buf.seek(0)
+        img_str = base64.b64encode(buf.getvalue()).decode('utf-8')
+        return f"data:image/png;base64,{img_str}"
+    return Figure, fig_to_image
+@app.cell(hide_code=True)
+def _():
+    # Import libraries
+    import numpy as np
+    import matplotlib.pyplot as plt
+    from scipy import stats
+    import io
+    import base64
+    return base64, io, np, plt, stats
+@app.cell(hide_code=True)
+def _(TangleSlider, mo):
+    mean_slider = mo.ui.anywidget(TangleSlider(
+        amount=0,
+        min_value=-5,
+        max_value=5,
+        step=0.1,
+        digits=1
+    ))
+    std_slider = mo.ui.anywidget(TangleSlider(
+        amount=1,
+        min_value=0.1,
+        max_value=3,
+        step=0.1,
+        digits=1
+    ))
+    return mean_slider, std_slider
+@app.cell(hide_code=True)
+def _(TangleSlider, mo):
+    x_slider = mo.ui.anywidget(TangleSlider(
+        amount=0,
+        min_value=-5,
+        max_value=5,
+        step=0.1,
+        digits=1
+    ))
+    mu_slider = mo.ui.anywidget(TangleSlider(
+        amount=0,
+        min_value=-5,
+        max_value=5,
+        step=0.1,
+        digits=1
+    ))
+    sigma_slider = mo.ui.anywidget(TangleSlider(
+        amount=1,
+        min_value=0.1,
+        max_value=3,
+        step=0.1,
+        digits=1
+    ))
+    return mu_slider, sigma_slider, x_slider
+@app.cell(hide_code=True)
+def _(np, plt, stats):
+    def create_distribution_comparison(mu=5, sigma=6):
+        # Create figure and axis
+        comparison_fig, ax = plt.subplots(figsize=(10, 6))
+        # X range for plotting
+        x = np.linspace(-10, 20, 1000)
+        # Standard normal
+        std_normal = stats.norm.pdf(x, 0, 1)
+        # Our example normal
+        example_normal = stats.norm.pdf(x, mu, sigma)
+        # Plot both distributions
+        ax.plot(x, std_normal, 'darkviolet', linewidth=2, label='Standard Normal')
+        ax.plot(x, example_normal, 'blue', linewidth=2, label=f'X ~ N({mu}, {sigma}²)')
+        # format the plot
+        ax.set_xlim(-10, 20)
+        ax.set_ylim(0, 0.45)
+        ax.set_xlabel('x')
+        ax.set_ylabel('Probability Density')
+        ax.grid(True, alpha=0.3)
+        ax.legend()
+        # Decorative text box for parameters
+        props = dict(boxstyle='round', facecolor='white', alpha=0.9)
+        textstr = '\n'.join((
+            r'Normal (aka Gaussian) Random Variable',
+            r'',
+            f'Parameter $\mu$: {mu}',
+            f'Parameter $\sigma$: {sigma}'
+        ))
+        ax.text(0.05, 0.95, textstr, transform=ax.transAxes, fontsize=10,
+                verticalalignment='top', bbox=props)
+        return comparison_fig
+    return (create_distribution_comparison,)
+@app.cell(hide_code=True)
+def _(np, plt, stats):
+    def create_voltage_example_visualization():
+        # Create data for plotting
+        x = np.linspace(-4, 4, 1000)
+        # Signal without noise (X = 2)
+        signal_value = 2
+        # Noise distribution (Y ~ N(0, 1))
+        noise_pdf = stats.norm.pdf(x, 0, 1)
+        # Signal + Noise distribution (R = X + Y ~ N(2, 1))
+        received_pdf = stats.norm.pdf(x, signal_value, 1)
+        # Create figure
+        voltage_fig, ax = plt.subplots(figsize=(10, 6))
+        # Plot the noise distribution
+        ax.plot(x, noise_pdf, 'blue', linewidth=1.5, alpha=0.6,
+               label='Noise: Y ~ N(0, 1)')
+        # received signal distribution
+        ax.plot(x, received_pdf, 'red', linewidth=2,
+               label=f'Received: R ~ N({signal_value}, 1)')
+        # vertical line at the decision boundary (0.5)
+        threshold = 0.5
+        ax.axvline(x=threshold, color='green', linestyle='--', linewidth=2,
+                  label=f'Decision threshold: {threshold}')
+        # Shade the error region
+        mask = x < threshold
+        error_prob = stats.norm.cdf(threshold, signal_value, 1)
+        ax.fill_between(x[mask], received_pdf[mask], color='darkorange', alpha=0.5,
+                       label=f'Error probability: {error_prob:.3f}')
+        # Styling
+        ax.set_title('Voltage Transmission Example: Probability of Error')
+        ax.set_xlabel('Voltage')
+        ax.set_ylabel('Probability Density')
+        ax.legend(loc='upper left')
+        ax.grid(alpha=0.3)
+        # Add explanatory annotations
+        ax.text(1.5, 0.1, 'When sending "1" (voltage=2),\nthis area represents\nthe error probability',
+               bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="black", lw=1))
+        plt.tight_layout()
+        plt.gca()
+        return voltage_fig, error_prob
+    return (create_voltage_example_visualization,)
+@app.cell(hide_code=True)
+def _(np, plt, stats):
+    def create_cdf_calculator_plot(calc_x, calc_mu, calc_sigma):
+        # Data range for plotting
+        x_range = np.linspace(calc_mu - 4*calc_sigma, calc_mu + 4*calc_sigma, 1000)
+        pdf = stats.norm.pdf(x_range, calc_mu, calc_sigma)
+        cdf = stats.norm.cdf(x_range, calc_mu, calc_sigma)
+        # Calculate the CDF at x
+        cdf_at_x = stats.norm.cdf(calc_x, calc_mu, calc_sigma)
+        # Create figure with two subplots
+        calc_fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8))
+        # Plot PDF on top subplot
+        ax1.plot(x_range, pdf, color='royalblue', linewidth=2, label='PDF')
+        # area shade for P(X ≤ x)
+        mask = x_range <= calc_x
+        ax1.fill_between(x_range[mask], pdf[mask], color='darkorange', alpha=0.6)
+        # Vertical line at x
+        ax1.axvline(x=calc_x, color='red', linestyle='--', linewidth=1.5)
+        # PDF labels and styling
+        ax1.set_title(f'Normal PDF with Area P(X ≤ {calc_x:.1f}) Highlighted')
+        ax1.set_xlabel('x')
+        ax1.set_ylabel('Probability Density')
+        ax1.annotate(f'x = {calc_x:.1f}', xy=(calc_x, 0), xytext=(calc_x, -0.01),
+                    horizontalalignment='center', color='red')
+        ax1.grid(alpha=0.3)
+        # CDF on bottom subplot
+        ax2.plot(x_range, cdf, color='green', linewidth=2, label='CDF')
+        # Mark the point (x, CDF(x))
+        ax2.plot(calc_x, cdf_at_x, 'ro', markersize=8)
+        # CDF labels and styling
+        ax2.set_title(f'Normal CDF: F({calc_x:.1f}) = {cdf_at_x:.3f}')
+        ax2.set_xlabel('x')
+        ax2.set_ylabel('Cumulative Probability')
+        ax2.annotate(f'F({calc_x:.1f}) = {cdf_at_x:.3f}',
+                     xy=(calc_x, cdf_at_x),
+                     xytext=(calc_x + 0.5*calc_sigma, cdf_at_x - 0.1),
+                     arrowprops=dict(facecolor='black', width=1, shrink=0.05),
+                     bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="black", lw=1))
+        ax2.grid(alpha=0.3)
+        plt.tight_layout()
+        plt.gca()
+        return calc_fig, cdf_at_x
+    return (create_cdf_calculator_plot,)
+@app.cell(hide_code=True)
+def _(np, plt, stats):
+    def create_standardization_plot():
+        x = np.linspace(-6, 6, 1000)
+        # Original distribution N(2, 1.5²)
+        mu_original, sigma_original = 2, 1.5
+        pdf_original = stats.norm.pdf(x, mu_original, sigma_original)
+        # shifted distribution N(0, 1.5²)
+        mu_shifted, sigma_shifted = 0, 1.5
+        pdf_shifted = stats.norm.pdf(x, mu_shifted, sigma_shifted)
+        # Standard normal N(0, 1)
+        mu_standard, sigma_standard = 0, 1
+        pdf_standard = stats.norm.pdf(x, mu_standard, sigma_standard)
+        # Create visualization
+        stand_fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
+        # Plot on  left: Original and shifted distributions
+        ax1.plot(x, pdf_original, 'royalblue', linewidth=2,
+                label=f'Original: N({mu_original}, {sigma_original}²)')
+        ax1.plot(x, pdf_shifted, 'darkorange', linewidth=2,
+                label=f'Shifted: N({mu_shifted}, {sigma_shifted}²)')
+        # Add arrow to show the shift
+        shift_x1, shift_y1 = mu_original, stats.norm.pdf(mu_original, mu_original, sigma_original)*0.6
+        shift_x2, shift_y2 = mu_shifted, stats.norm.pdf(mu_shifted, mu_shifted, sigma_shifted)*0.6
+        ax1.annotate('', xy=(shift_x2, shift_y2), xytext=(shift_x1, shift_y1),
+                    arrowprops=dict(facecolor='black', width=1.5, shrink=0.05))
+        ax1.text(0.8, 0.28, 'Subtract μ', transform=ax1.transAxes)
+        # Plot on right: Shifted and standard normal
+        ax2.plot(x, pdf_shifted, 'darkorange', linewidth=2,
+                label=f'Shifted: N({mu_shifted}, {sigma_shifted}²)')
+        ax2.plot(x, pdf_standard, 'green', linewidth=2,
+                label=f'Standard: N({mu_standard}, {sigma_standard}²)')
+        # Add arrow to show the scaling
+        scale_x1, scale_y1 = 2*sigma_shifted, stats.norm.pdf(2*sigma_shifted, mu_shifted, sigma_shifted)*0.8
+        scale_x2, scale_y2 = 2*sigma_standard, stats.norm.pdf(2*sigma_standard, mu_standard, sigma_standard)*0.8
+        ax2.annotate('', xy=(scale_x2, scale_y2), xytext=(scale_x1, scale_y1),
+                    arrowprops=dict(facecolor='black', width=1.5, shrink=0.05))
+        ax2.text(0.75, 0.5, 'Divide by σ', transform=ax2.transAxes)
+        # some styling
+        for ax in (ax1, ax2):
+            ax.set_xlabel('x')
+            ax.set_ylabel('Probability Density')
+            ax.grid(alpha=0.3)
+            ax.legend()
+        ax1.set_title('Step 1: Shift the Distribution')
+        ax2.set_title('Step 2: Scale the Distribution')
+        plt.tight_layout()
+        plt.gca()
+        return stand_fig
+    return (create_standardization_plot,)
+@app.cell(hide_code=True)
+def _(np, plt, stats):
+    def create_probability_example(example_mu=3, example_sigma=4, example_query=0):
+        # Create data range
+        x = np.linspace(example_mu - 4*example_sigma, example_mu + 4*example_sigma, 1000)
+        pdf = stats.norm.pdf(x, example_mu, example_sigma)
+        # probability calc
+        prob_value = 1 - stats.norm.cdf(example_query, example_mu, example_sigma)
+        ex_z_score = (example_query - example_mu) / example_sigma
+        # Create visualization
+        prob_fig, ax = plt.subplots(figsize=(10, 6))
+        # Plot PDF
+        ax.plot(x, pdf, 'royalblue', linewidth=2)
+        # area shading representing the probability
+        mask = x >= example_query
+        ax.fill_between(x[mask], pdf[mask], color='darkorange', alpha=0.6)
+        # Add vertical line at query point
+        ax.axvline(x=example_query, color='red', linestyle='--', linewidth=1.5)
+        # Annotations
+        ax.annotate(f'x = {example_query}', xy=(example_query, 0), xytext=(example_query, -0.005),
+                   horizontalalignment='center')
+        ax.annotate(f'P(X > {example_query}) = {prob_value:.3f}',
+                    xy=(example_query + example_sigma, 0.015),
+                    xytext=(example_query + 1.5*example_sigma, 0.02),
+                    arrowprops=dict(facecolor='black', width=1, shrink=0.05),
+                    bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="black", lw=1))
+        # Standard normal calculation annotation
+        ax.annotate(f'= P(Z > {ex_z_score:.3f}) = {prob_value:.3f}',
+                    xy=(example_query - example_sigma, 0.01),
+                    xytext=(example_query - 2*example_sigma, 0.015),
+                    arrowprops=dict(facecolor='black', width=1, shrink=0.05),
+                    bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="black", lw=1))
+        # some styling
+        ax.set_title(f'Example: P(X > {example_query}) where X ~ N({example_mu}, {example_sigma}²)')
+        ax.set_xlabel('x')
+        ax.set_ylabel('Probability Density')
+        ax.grid(alpha=0.3)
+        plt.tight_layout()
+        plt.gca()
+        return prob_fig, prob_value, ex_z_score
+    return (create_probability_example,)
+@app.cell(hide_code=True)
+def _(np, plt, stats):
+    def create_range_probability_example(range_mu=3, range_sigma=4, range_lower=2, range_upper=5):
+        x = np.linspace(range_mu - 4*range_sigma, range_mu + 4*range_sigma, 1000)
+        pdf = stats.norm.pdf(x, range_mu, range_sigma)
+        # probability
+        range_prob = stats.norm.cdf(range_upper, range_mu, range_sigma) - stats.norm.cdf(range_lower, range_mu, range_sigma)
+        range_z_lower = (range_lower - range_mu) / range_sigma
+        range_z_upper = (range_upper - range_mu) / range_sigma
+        # Create visualization
+        range_fig, ax = plt.subplots(figsize=(10, 6))
+        # Plot PDF
+        ax.plot(x, pdf, 'royalblue', linewidth=2)
+        # Shade the area representing the probability
+        mask = (x >= range_lower) & (x <= range_upper)
+        ax.fill_between(x[mask], pdf[mask], color='darkorange', alpha=0.6)
+        # Add vertical lines at query points
+        ax.axvline(x=range_lower, color='red', linestyle='--', linewidth=1.5)
+        ax.axvline(x=range_upper, color='red', linestyle='--', linewidth=1.5)
+        # Annotations
+        ax.annotate(f'x = {range_lower}', xy=(range_lower, 0), xytext=(range_lower, -0.005),
+                   horizontalalignment='center')
+        ax.annotate(f'x = {range_upper}', xy=(range_upper, 0), xytext=(range_upper, -0.005),
+                   horizontalalignment='center')
+        ax.annotate(f'P({range_lower} < X < {range_upper}) = {range_prob:.3f}',
+                    xy=((range_lower + range_upper)/2, max(pdf[mask])/2),
+                    xytext=((range_lower + range_upper)/2, max(pdf[mask])*1.5),
+                    arrowprops=dict(facecolor='black', width=1, shrink=0.05),
+                    bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="black", lw=1),
+                    horizontalalignment='center')
+        # Standard normal calculation annotation
+        ax.annotate(f'= P({range_z_lower:.3f} < Z < {range_z_upper:.3f}) = {range_prob:.3f}',
+                    xy=((range_lower + range_upper)/2, max(pdf[mask])/3),
+                    xytext=(range_mu - 2*range_sigma, max(pdf[mask])/1.5),
+                    arrowprops=dict(facecolor='black', width=1, shrink=0.05),
+                    bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="black", lw=1))
+        ax.set_title(f'Example: P({range_lower} < X < {range_upper}) where X ~ N({range_mu}, {range_sigma}²)')
+        ax.set_xlabel('x')
+        ax.set_ylabel('Probability Density')
+        ax.grid(alpha=0.3)
+        plt.tight_layout()
+        plt.gca()
+        return range_fig, range_prob, range_z_lower, range_z_upper
+    return (create_range_probability_example,)
+if __name__ == "__main__":
+    app.run()

probability/18_central_limit_theorem.py ADDED Viewed

	@@ -0,0 +1,943 @@

+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "marimo",
+#     "matplotlib==3.10.1",
+#     "scipy==1.15.2",
+#     "numpy==2.2.4",
+#     "plotly==5.18.0",
+# ]
+# ///
+import marimo
+__generated_with = "0.11.30"
+app = marimo.App(width="medium", app_title="Central Limit Theorem")
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        # Central Limit Theorem
+        _This notebook is a computational companion to ["Probability for Computer Scientists"](https://chrispiech.github.io/probabilityForComputerScientists/en/part4/clt/), by Stanford professor Chris Piech._
+        The Central Limit Theorem (CLT) is one of the most important concepts in probability theory and statistics. It explains why many real-world distributions tend to be normal, even when the underlying processes are not.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Central Limit Theorem Statement
+        There are two ways to state the central limit theorem:
+        ### Sum Version
+        Let $X_1, X_2, \dots, X_n$ be independent and identically distributed random variables. The sum of these random variables approaches a normal distribution as $n \rightarrow \infty$:
+        $n∑i=1Xi∼N(n⋅μ,n⋅σ2)\sum_{i=1}^{n}X_i \sim \mathcal{N}(n \cdot \mu, n \cdot \sigma^2)$
+        Where $\mu = E[X_i]$ and $\sigma^2 = \text{Var}(X_i)$. Since each $X_i$ is identically distributed, they share the same expectation and variance.
+        ### Average Version
+        Let $X_1, X_2, \dots, X_n$ be independent and identically distributed random variables. The average of these random variables approaches a normal distribution as $n \rightarrow \infty$:
+        $\frac{1}{n} ∑i=1Xi∼N(μ,σ2n)\frac{1}{n}\sum_{i=1}^{n}X_i \sim \mathcal{N}(\mu, \frac{\sigma^2}{n})$
+        Where $\mu = E[X_i]$ and $\sigma^2 = \text{Var}(X_i)$.
+        The CLT is incredible because it applies to almost any distribution (as long as it has a finite mean and variance), regardless of its shape.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Central Limit Theorem Intuition
+        Let's explore what happens when you add random variables together. For example, what if we add 100 different uniform random variables?
+        ```python
+        from random import random
+        def add_100_uniforms():
+           total = 0
+           for i in range(100):
+               # returns a sample from uniform(0, 1)
+               x_i = random()
+               total += x_i
+           return total
+        ```
+        The value returned by this function will be a random variable. Click the button below to run the function and observe the resulting value of total:
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    run_button = mo.ui.run_button(label="Run add_100_uniforms()")
+    run_button.center()
+    return (run_button,)
+@app.cell(hide_code=True)
+def _(mo, random, run_button):
+    def add_100_uniforms():
+        total = 0
+        for i in range(100):
+            # returns a sample from uniform(0, 1)
+            x_i = random.random()
+            total += x_i
+        return total
+    # Display the result when the button is clicked
+    if run_button.value:
+        uniform_result = add_100_uniforms()
+        display = mo.md(f"**total**: {uniform_result:.5f}")
+    else:
+        display = mo.md("")
+    display
+    return add_100_uniforms, display, uniform_result
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""What does total look like as a distribution? Let's calculate total many times and visualize the histogram of values it produces.""")
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    # Simulation control
+    run_simulation_button = mo.ui.button(
+        value=0,
+        on_click=lambda value: value + 1,
+        label="Run 10,000 more samples",
+        kind="warn"
+    )
+    run_simulation_button.center()
+    return (run_simulation_button,)
+@app.cell(hide_code=True)
+def _(add_100_uniforms, go, mo, np, run_simulation_button, stats, time):
+    # store the results
+    def get_simulation_results():
+        if not hasattr(get_simulation_results, "results"):
+            get_simulation_results.results = []
+            get_simulation_results.last_button_value = -1  # track button clicks
+        return get_simulation_results
+    # grab the results
+    sim_storage = get_simulation_results()
+    simulation_results = sim_storage.results
+    # Check if button was clicked (value changed)
+    if run_simulation_button.value != sim_storage.last_button_value:
+        # Update the last seen button value
+        sim_storage.last_button_value = run_simulation_button.value
+        with mo.status.spinner(title="Running simulation...") as progress_status:
+            sim_count = 10000
+            new_results = []
+            for _ in mo.status.progress_bar(range(sim_count)):
+                sim_result = add_100_uniforms()
+                new_results.append(sim_result)
+                time.sleep(0.0001)  # tiny pause
+            simulation_results.extend(new_results)
+            progress_status.update(f"✅ Added {sim_count:,} samples (total: {len(simulation_results):,})")
+    if simulation_results:
+        # Numbers
+        mean = np.mean(simulation_results)
+        std_dev = np.std(simulation_results)
+        theoretical_mean = 100 * 0.5  # = 50
+        theoretical_variance = 100 * (1/12)  # = 8.33...
+        theoretical_std = np.sqrt(theoretical_variance)  # ≈ 2.89
+        # should be 10k times the click number (mainly for the y-axis label)
+        total_samples = run_simulation_button.value * 10000
+        fig = go.Figure()
+        # histogram of samples
+        fig.add_trace(go.Histogram(
+            x=simulation_results,
+            histnorm='probability density',
+            name='Sum Distribution',
+            marker_color='royalblue',
+            opacity=0.7
+        ))
+        x_vals = np.linspace(min(simulation_results), max(simulation_results), 1000)
+        y_vals = stats.norm.pdf(x_vals, theoretical_mean, theoretical_std)
+        fig.add_trace(go.Scatter(
+            x=x_vals,
+            y=y_vals,
+            mode='lines',
+            name='Normal approximation',
+            line=dict(color='red', width=2)
+        ))
+        fig.add_vline(
+            x=mean,
+            line_dash="dash",
+            line_width=1.5,
+            line_color="green",
+            annotation_text=f"Sample Mean: {mean:.2f}",
+            annotation_position="top right"
+        )
+        # some notes
+        fig.add_annotation(
+            x=0.02, y=0.95,
+            xref="paper", yref="paper",
+            text=f"Sum of 100 Uniform(0,1) variables<br>" +
+                 f"Sample size: {total_samples:,}<br>" +
+                 f"Sample mean: {mean:.2f} (expected: {theoretical_mean})<br>" +
+                 f"Sample std: {std_dev:.2f} (expected: {theoretical_std:.2f})<br>" +
+                 f"According to CLT: Normal({theoretical_mean}, {theoretical_variance:.2f})",
+            showarrow=False,
+            align="left",
+            bgcolor="white",
+            opacity=0.8
+        )
+        fig.update_layout(
+            title=f'Distribution of Sum of 100 Uniforms (Click #{run_simulation_button.value})',
+            xaxis_title='Values',
+            yaxis_title=f'Probability Density ({total_samples:,} runs)',
+            template='plotly_white',
+            height=500
+        )
+        # show
+        histogram = mo.ui.plotly(fig)
+    else:
+        histogram = mo.md("Click the button to run the simulation!")
+    # display
+    histogram
+    return (
+        fig,
+        get_simulation_results,
+        histogram,
+        mean,
+        new_results,
+        progress_status,
+        sim_count,
+        sim_result,
+        sim_storage,
+        simulation_results,
+        std_dev,
+        theoretical_mean,
+        theoretical_std,
+        theoretical_variance,
+        total_samples,
+        x_vals,
+        y_vals,
+    )
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        That is interesting! The sum of 100 independent uniforms looks normal. Is that a special property of uniforms? No! It turns out to work for almost any type of distribution (as long as the distribution has finite mean and variance).
+        - Sum of 40 $X_i$ where $X_i \sim \text{Beta}(a = 5, b = 4)$? Normal.
+        - Sum of 90 $X_i$ where $X_i \sim \text{Poisson}(\lambda = 4)$? Normal.
+        - Sum of 50 dice-rolls? Normal.
+        - Average of 10000 $X_i$ where $X_i \sim \text{Exp}(\lambda = 8)$? Normal.
+        For any distribution, the sum or average of a sufficiently large number of independent, identically distributed random variables will be approximately normally distributed.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Continuity Correction
+        When using the Central Limit Theorem with discrete random variables (like a Binomial or Poisson), we need to apply a continuity correction. This is because we're approximating a discrete distribution with a continuous one (normal).
+        The continuity correction involves adjusting the boundaries in probability calculations by ±0.5 to account for the discrete nature of the original variable.
+        You should use a continuity correction any time your normal is approximating a discrete random variable. The rules for a general continuity correction are the same as the rules for the [binomial-approximation continuity correction](http://marimo.app/https://github.com/marimo-team/learn/blob/main/probability/14_binomial_distribution.py).
+        In our example above, where we added 100 uniforms, a continuity correction isn't needed because the sum of uniforms is continuous. However, in examples with dice or other discrete distributions, a continuity correction would be necessary.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Examples
+        Let's work through some practical examples to see how the Central Limit Theorem is applied.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ### Example 1: Dice Game
+        You will roll a 6-sided dice 10 times. Let $X$ be the total value of all 10 dice: $X = X_1 + X_2 + \dots + X_{10}$. You win the game if $X \leq 25$ or $X \geq 45$. Use the central limit theorem to calculate the probability that you win.
+        Recall that for a single die roll $X_i$:
+        - $E[X_i] = 3.5$
+        - $\text{Var}(X_i) = \frac{35}{12}$
+        **Solution:**
+        Let $Y$ be the approximating normal distribution. By the Central Limit Theorem:
+        $Y∼N(10⋅E[Xi],10⋅Var(Xi))Y \sim \mathcal{N}(10 \cdot E[X_i], 10 \cdot \text{Var}(X_i))$
+        Substituting in the known values:
+        $Y∼N(10⋅3.5,10⋅3512)=N(35,29.2)Y \sim \mathcal{N}(10 \cdot 3.5, 10 \cdot \frac{35}{12}) = \mathcal{N}(35, 29.2)$
+        Now we calculate the probability:
+        $P(X≤25 or X≥45)P(X \leq 25 \text{ or } X \geq 45)$
+        $=P(X≤25)+P(X≥45)= P(X \leq 25) + P(X \geq 45)$
+        $≈P(Y<25.5)+P(Y>44.5) (Continuity Correction)\approx P(Y < 25.5) + P(Y > 44.5) \text{ (Continuity Correction)}$
+        $≈P(Y<25.5)+[1−P(Y<44.5)]\approx P(Y < 25.5) + [1 - P(Y < 44.5)]$
+        $≈Φ(25.5−35√29.2)+[1−Φ(44.5−35√29.2)]\approx \Phi\left(\frac{25.5 - 35}{\sqrt{29.2}}\right) + \left[1 - \Phi\left(\frac{44.5 - 35}{\sqrt{29.2}}\right)\right]$
+        $≈Φ(−1.76)+[1−Φ(1.76)]\approx \Phi(-1.76) + [1 - \Phi(1.76)]$
+        $≈0.039+(1−0.961)\approx 0.039 + (1 - 0.961)$
+        $≈0.078\approx 0.078$
+        So, the probability of winning the game is approximately 7.8%.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(create_dice_game_visualization, fig_to_image, mo):
+    # Display visualization
+    dice_game_fig = create_dice_game_visualization()
+    dice_game_image = mo.image(fig_to_image(dice_game_fig), width="100%")
+    dice_explanation = mo.md(
+        r"""
+        **Visualization Explanation:**
+        The graph shows the distribution of the sum of 10 dice rolls. The blue bars represent the actual probability mass function (PMF), while the red curve shows the normal approximation using the Central Limit Theorem.
+        The winning regions are shaded in orange:
+        - The left region where $X \leq 25$
+        - The right region where $X \geq 45$
+        The total probability of these regions is approximately 0.078 or 7.8%.
+        Notice how the normal approximation provides a good fit to the discrete distribution, demonstrating the power of the Central Limit Theorem.
+        """
+    )
+    mo.vstack([dice_game_image, dice_explanation])
+    return dice_explanation, dice_game_fig, dice_game_image
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ### Example 2: Algorithm Runtime Estimation
+        Say you have a new algorithm and you want to test its running time. You know the variance of the algorithm's run time is $\sigma^2 = 4 \text{ sec}^2$, but you want to estimate the mean run time $t$ in seconds.
+        You can run the algorithm repeatedly (IID trials). How many trials do you have to run so that your estimated runtime is within ±0.5 seconds of $t$ with 95% certainty?
+        Let $X_i$ be the run time of the $i$-th run (for $1 \leq i \leq n$).
+        **Solution:**
+        We need to find $n$ such that:
+        $0.95=P(−0.5≤∑ni=1Xin−t≤0.5)0.95 = P\left(-0.5 \leq \frac{\sum_{i=1}^n X_i}{n} - t \leq 0.5\right)$
+        By the central limit theorem, the sample mean follows a normal distribution.
+        We can standardize this to work with the standard normal:
+        $Z=(∑ni=1Xi)−nμσ√nZ = \frac{\left(\sum_{i=1}^n X_i\right) - n\mu}{\sigma \sqrt{n}}$
+        $=(∑ni=1Xi)−nt2√n= \frac{\left(\sum_{i=1}^n X_i\right) - nt}{2 \sqrt{n}}$
+        Rewriting our probability inequality so that the central term is $Z$:
+        $0.95=P(−0.5≤∑ni=1Xin−t≤0.5)0.95 = P\left(-0.5 \leq \frac{\sum_{i=1}^n X_i}{n} - t \leq 0.5\right)$
+        $=P(−0.5√n2≤Z≤0.5√n2)= P\left(\frac{-0.5 \sqrt{n}}{2} \leq Z \leq \frac{0.5 \sqrt{n}}{2}\right)$
+        And now we find the value of $n$ that makes this equation hold:
+        $0.95=Φ(√n4)−Φ(−√n4)0.95 = \Phi\left(\frac{\sqrt{n}}{4}\right) - \Phi\left(-\frac{\sqrt{n}}{4}\right)$
+        $4=Φ(√n4)−(1−Φ(√n4))= \Phi\left(\frac{\sqrt{n}}{4}\right) - \left(1 - \Phi\left(\frac{\sqrt{n}}{4}\right)\right)$
+        $=2Φ(√n4)−1= 2\Phi\left(\frac{\sqrt{n}}{4}\right) - 1$
+        Solving for $\Phi\left(\frac{\sqrt{n}}{4}\right)$:
+        $0.975=Φ(√n4)0.975 = \Phi\left(\frac{\sqrt{n}}{4}\right)$
+        $Φ−1(0.975)=√n4\Phi^{-1}(0.975) = \frac{\sqrt{n}}{4}$
+        $1.96=√n41.96 = \frac{\sqrt{n}}{4}$
+        $n=61.4n = 61.4$
+        Therefore, we need to run the algorithm 62 times to estimate the mean runtime within ±0.5 seconds with 95% confidence.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(create_algorithm_runtime_visualization, fig_to_image, mo):
+    # Display visualization
+    runtime_fig = create_algorithm_runtime_visualization()
+    runtime_image = mo.image(fig_to_image(runtime_fig), width="100%")
+    runtime_explanation = mo.md(
+        r"""
+        **Visualization Explanation:**
+        The graph illustrates how the standard error of the mean (SEM) decreases as the number of trials increases. The standard error is calculated as $\frac{\sigma}{\sqrt{n}}$.
+        - When we conduct 62 trials, the standard error is approximately 0.254 seconds.
+        - With a 95% confidence level, this gives us a margin of error of about ±0.5 seconds (1.96 × 0.254 ≈ 0.5).
+        - The shaded region shows how the confidence interval narrows as the number of trials increases.
+        This demonstrates why 62 trials are sufficient to meet our requirements of estimating the mean runtime within ±0.5 seconds with 95% confidence.
+        """
+    )
+    mo.vstack([runtime_image, runtime_explanation])
+    return runtime_explanation, runtime_fig, runtime_image
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Interactive CLT Explorer
+        Let's explore how the Central Limit Theorem works with different underlying distributions. You can select a distribution type and see how the distribution of the sample mean changes as the sample size increases.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(controls):
+    controls
+    return
+@app.cell(hide_code=True)
+def _(
+    distribution_type,
+    fig_to_image,
+    mo,
+    np,
+    plt,
+    run_explorer_button,
+    sample_size,
+    sim_count_slider,
+    stats,
+):
+    # Run simulation when button is clicked
+    if run_explorer_button.value:
+        # Set distribution parameters based on selection
+        if distribution_type.value == "uniform":
+            dist_name = "Uniform(0, 1)"
+            # For uniform(0,1): mean = 0.5, variance = 1/12
+            true_mean = 0.5
+            true_var = 1/12
+            # generate samples
+            def generate_sample():
+                return np.random.uniform(0, 1, sample_size.value)
+        elif distribution_type.value == "exponential":
+            rate = 1.0
+            dist_name = f"Exponential(λ={rate})"
+            # For exponential(λ): mean = 1/λ, variance = 1/λ²
+            true_mean = 1/rate
+            true_var = 1/(rate**2)
+            def generate_sample():
+                return np.random.exponential(1/rate, sample_size.value)
+        elif distribution_type.value == "binomial":
+            n_param, p = 10, 0.3
+            dist_name = f"Binomial(n={n_param}, p={p})"
+            # For binomial(n,p): mean = np, variance = np(1-p)
+            true_mean = n_param * p
+            true_var = n_param * p * (1-p)
+            def generate_sample():
+                return np.random.binomial(n_param, p, sample_size.value)
+        elif distribution_type.value == "poisson":
+            rate = 3.0
+            dist_name = f"Poisson(λ={rate})"
+            # For poisson(λ): mean = λ, variance = λ
+            true_mean = rate
+            true_var = rate
+            def generate_sample():
+                return np.random.poisson(rate, sample_size.value)
+        # Generate the simulation data using a spinner for progress
+        with mo.status.spinner(title="Running simulation...") as explorer_progress:
+            sample_means = []
+            original_samples = []
+            # Run simulations
+            for _ in mo.status.progress_bar(range(sim_count_slider.value)):
+                sample = generate_sample()
+                # Store the first simulation's individual values for visualizing original distribution
+                if len(original_samples) < 1000:  # limit to prevent memory issues
+                    original_samples.extend(sample)
+                # sample mean
+                sample_means.append(np.mean(sample))
+            # progress
+            explorer_progress.update(f"✅ Completed {sim_count_slider.value:,} simulations")
+            # Create visualization
+            explorer_fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
+            # Original distribution histogram
+            ax1.hist(original_samples, bins=30, density=True, alpha=0.7, color='royalblue')
+            ax1.set_title(f"Original Distribution: {dist_name}")
+            # Theoretical mean line
+            ax1.axvline(x=true_mean, color='red', linestyle='--',
+                        label=f'True Mean = {true_mean:.3f}')
+            ax1.set_xlabel("Value")
+            ax1.set_ylabel("Density")
+            ax1.legend()
+            # Sample means histogram and normal approximation
+            sample_mean_mean = np.mean(sample_means)
+            sample_mean_std = np.std(sample_means)
+            expected_std = np.sqrt(true_var / sample_size.value)  # CLT prediction
+            ax2.hist(sample_means, bins=30, density=True, alpha=0.7, color='forestgreen',
+                    label=f'Sample Size = {sample_size.value}')
+            # Normal approximation from CLT
+            explorer_x = np.linspace(min(sample_means), max(sample_means), 1000)
+            explorer_y = stats.norm.pdf(explorer_x, true_mean, expected_std)
+            ax2.plot(explorer_x, explorer_y, 'r-', linewidth=2, label='CLT Normal Approximation')
+            # Add mean line
+            ax2.axvline(x=true_mean, color='purple', linestyle='--',
+                       label=f'True Mean = {true_mean:.3f}')
+            ax2.set_title(f"Distribution of Sample Means\n(CLT Prediction: N({true_mean:.3f}, {true_var/sample_size.value:.5f}))")
+            ax2.set_xlabel("Sample Mean")
+            ax2.set_ylabel("Density")
+            ax2.legend()
+            # Add CLT description
+            explorer_fig.text(0.5, 0.01,
+                    f"Central Limit Theorem: As sample size increases, the distribution of sample means approaches\n" +
+                    f"a normal distribution with mean = {true_mean:.3f} and variance = {true_var:.3f}/{sample_size.value} = {true_var/sample_size.value:.5f}",
+                    ha='center', fontsize=10, bbox=dict(facecolor='white', alpha=0.8))
+            plt.tight_layout(rect=[0, 0.05, 1, 1])
+            # Display plot
+            explorer_image = mo.image(fig_to_image(explorer_fig), width="100%")
+    else:
+        explorer_image = mo.md("Click the 'Run Simulation' button to see how the Central Limit Theorem works.")
+    explorer_image
+    return (
+        ax1,
+        ax2,
+        dist_name,
+        expected_std,
+        explorer_fig,
+        explorer_image,
+        explorer_progress,
+        explorer_x,
+        explorer_y,
+        generate_sample,
+        n_param,
+        original_samples,
+        p,
+        rate,
+        sample,
+        sample_mean_mean,
+        sample_mean_std,
+        sample_means,
+        true_mean,
+        true_var,
+    )
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## 🤔 Test Your Understanding
+        /// details | What is the shape of the distribution of the sum of many independent random variables?
+        The sum of many independent random variables approaches a normal distribution, regardless of the shape of the original distributions (as long as they have finite mean and variance). This is the essence of the Central Limit Theorem.
+        ///
+        /// details | If $X_1, X_2, \dots, X_{100}$ are IID random variables with $E[X_i] = 5$ and $Var(X_i) = 9$, what is the distribution of their sum?
+        By the Central Limit Theorem, the sum $S = X_1 + X_2 + \dots + X_{100}$ follows a normal distribution with:
+        - Mean: $E[S] = 100 \cdot E[X_i] = 100 \cdot 5 = 500$
+        - Variance: $Var(S) = 100 \cdot Var(X_i) = 100 \cdot 9 = 900$
+        Therefore, $S \sim \mathcal{N}(500, 900)$, or equivalently $S \sim \mathcal{N}(500, 30^2)$.
+        ///
+        /// details | When do you need to apply a continuity correction when using the Central Limit Theorem?
+        You need to apply a continuity correction when you're using the normal approximation (through CLT) for a discrete random variable.
+        For example, when approximating a binomial or Poisson distribution with a normal distribution, you should adjust boundaries by ±0.5 to account for the discrete nature of the original variable. This makes the approximation more accurate.
+        ///
+        /// details | If $X_1, X_2, \dots, X_{n}$ are IID random variables, how does the variance of their sample mean $\bar{X} = \frac{1}{n}\sum_{i=1}^{n}X_i$ change as $n$ increases?
+        The variance of the sample mean decreases as the sample size $n$ increases. Specifically:
+        $Var(\bar{X}) = \frac{Var(X_i)}{n}$
+        This means that as we take more samples, the sample mean becomes more concentrated around the true mean of the distribution. This is why larger samples give more precise estimates.
+        ///
+        /// details | Why is the Central Limit Theorem so important in statistics?
+        The Central Limit Theorem is foundational in statistics because:
+        1. It allows us to make inferences about population parameters using sample statistics, regardless of the population's distribution.
+        2. It explains why the normal distribution appears so frequently in natural phenomena.
+        3. It enables the construction of confidence intervals and hypothesis tests for means, even when the underlying population distribution is unknown.
+        4. It justifies many statistical methods that assume normality, even when working with non-normal data, provided the sample size is large enough.
+        In essence, the CLT provides the theoretical justification for much of statistical inference.
+        ///
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""## Appendix (helper code and functions)""")
+    return
+@app.cell
+def _():
+    import marimo as mo
+    return (mo,)
+@app.cell(hide_code=True)
+def _():
+    from wigglystuff import TangleSlider
+    return (TangleSlider,)
+@app.cell(hide_code=True)
+def _():
+    # Import libraries
+    import numpy as np
+    import matplotlib.pyplot as plt
+    from scipy import stats
+    import io
+    import base64
+    import random
+    import time
+    import plotly.graph_objects as go
+    import plotly.io as pio
+    return base64, go, io, np, pio, plt, random, stats, time
+@app.cell(hide_code=True)
+def _(base64, io):
+    from matplotlib.figure import Figure
+    # Helper function to convert matplotlib figures to images
+    def fig_to_image(fig):
+        buf = io.BytesIO()
+        fig.savefig(buf, format='png', bbox_inches='tight')
+        buf.seek(0)
+        img_str = base64.b64encode(buf.getvalue()).decode('utf-8')
+        return f"data:image/png;base64,{img_str}"
+    return Figure, fig_to_image
+@app.cell(hide_code=True)
+def _(np, plt, stats):
+    def create_dice_game_visualization():
+        """Create a visualization for the dice game example."""
+        # Parameters
+        n_dice = 10
+        dice_values = np.arange(1, 7)  # 1 to 6
+        # Theoretical values
+        single_die_mean = np.mean(dice_values)  # 3.5
+        single_die_var = np.var(dice_values)    # 35/12
+        # Sum distribution parameters
+        sum_mean = n_dice * single_die_mean
+        sum_var = n_dice * single_die_var
+        sum_std = np.sqrt(sum_var)
+        # Possible outcomes for the sum of 10 dice
+        min_sum = n_dice * min(dice_values)  # 10
+        max_sum = n_dice * max(dice_values)  # 60
+        sum_values = np.arange(min_sum, max_sum + 1)
+        # Create figure
+        fig, ax = plt.subplots(figsize=(10, 6))
+        # Calculate PMF through convolution
+        # For one die
+        single_pmf = np.ones(6) / 6
+        sum_pmf = single_pmf.copy()
+        for _ in range(n_dice - 1):
+            sum_pmf = np.convolve(sum_pmf, single_pmf)
+        # Plot the PMF
+        ax.bar(sum_values, sum_pmf, alpha=0.7, color='royalblue', label='Exact PMF')
+        # Normal approximation
+        x = np.linspace(min_sum - 5, max_sum + 5, 1000)
+        y = stats.norm.pdf(x, sum_mean, sum_std)
+        ax.plot(x, y, 'r-', linewidth=2, label='Normal Approximation')
+        # Win conditions (x ≤ 25 or x ≥ 45)
+        win_region_left = sum_values <= 25
+        win_region_right = sum_values >= 45
+        # Shade win regions
+        ax.bar(sum_values[win_region_left], sum_pmf[win_region_left],
+               color='darkorange', alpha=0.7, label='Win Region')
+        ax.bar(sum_values[win_region_right], sum_pmf[win_region_right],
+               color='darkorange', alpha=0.7)
+        # Calculate win probability
+        win_prob = np.sum(sum_pmf[win_region_left]) + np.sum(sum_pmf[win_region_right])
+        # Add vertical lines for critical values
+        ax.axvline(x=25.5, color='red', linestyle='--', linewidth=1.5, label='Critical Points')
+        ax.axvline(x=44.5, color='red', linestyle='--', linewidth=1.5)
+        # Add mean line
+        ax.axvline(x=sum_mean, color='green', linestyle='--', linewidth=1.5,
+                   label=f'Mean = {sum_mean}')
+        # Text box with relevant information
+        textstr = '\n'.join((
+            f'Number of dice: {n_dice}',
+            f'Sum Mean: {sum_mean}',
+            f'Sum Std Dev: {sum_std:.2f}',
+            f'Win Probability: {win_prob:.4f}',
+            f'CLT Approximation: {0.078:.4f}'
+        ))
+        props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
+        ax.text(0.05, 0.95, textstr, transform=ax.transAxes, fontsize=10,
+                verticalalignment='top', bbox=props)
+        # Formatting
+        ax.set_xlabel('Sum of 10 Dice')
+        ax.set_ylabel('Probability')
+        ax.set_title('Central Limit Theorem: Dice Game Example')
+        ax.legend()
+        ax.grid(alpha=0.3)
+        plt.tight_layout()
+        plt.gca()
+        return fig
+    return (create_dice_game_visualization,)
+@app.cell(hide_code=True)
+def _(np, plt):
+    def create_algorithm_runtime_visualization():
+        """Create a visualization for the algorithm runtime example."""
+        # Parameters
+        variance = 4  # σ² = 4 sec²
+        std_dev = np.sqrt(variance)  # σ = 2 sec
+        confidence_level = 0.95
+        z_score = 1.96  # for 95% confidence
+        target_error = 0.5  # ±0.5 seconds
+        # Calculate n needed for desired precision
+        n_required = int(np.ceil((z_score * std_dev / target_error) ** 2))  # ≈ 62
+        n_values = np.arange(1, 100)
+        # standard error
+        standard_errors = std_dev / np.sqrt(n_values)
+        # margin of error
+        margins_of_error = z_score * standard_errors
+        # Create figure
+        fig, ax = plt.subplots(figsize=(10, 6))
+        # standard error vs sample size plot
+        ax.plot(n_values, standard_errors, 'b-', linewidth=2, label='Standard Error of Mean')
+        # Plot margin of error vs sample size
+        ax.plot(n_values, margins_of_error, 'r--', linewidth=2,
+                label=f'{confidence_level*100}% Margin of Error')
+        ax.axvline(x=n_required, color='green', linestyle='-', linewidth=1.5,
+                   label=f'Required n = {n_required}')
+        ax.axhline(y=target_error, color='purple', linestyle='--', linewidth=1.5,
+                   label=f'Target Error = ±{target_error} sec')
+        # Shade the region below target error
+        ax.fill_between(n_values, 0, target_error, alpha=0.2, color='green')
+        # intersection point
+        ax.plot(n_required, target_error, 'ro', markersize=8)
+        ax.annotate(f'({n_required}, {target_error} sec)',
+                    xy=(n_required, target_error),
+                    xytext=(n_required + 5, target_error + 0.1),
+                    arrowprops=dict(facecolor='black', shrink=0.05, width=1))
+        # Text box with appropriate information
+        textstr = '\n'.join((
+            f'Algorithm Variance: {variance} sec²',
+            f'Standard Deviation: {std_dev} sec',
+            f'Confidence Level: {confidence_level*100}%',
+            f'Z-score: {z_score}',
+            f'Target Error: ±{target_error} sec',
+            f'Required Sample Size: {n_required}'
+        ))
+        props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
+        ax.text(0.05, 0.95, textstr, transform=ax.transAxes, fontsize=10,
+                verticalalignment='top', bbox=props)
+        # Formatting
+        ax.set_xlabel('Sample Size (n)')
+        ax.set_ylabel('Error (seconds)')
+        ax.set_title('Sample Size Determination for Algorithm Runtime Estimation')
+        ax.set_xlim(0, 100)
+        ax.set_ylim(0, 2)
+        ax.legend()
+        ax.grid(alpha=0.3)
+        plt.tight_layout()
+        return fig
+    return (create_algorithm_runtime_visualization,)
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Summary
+        The Central Limit Theorem is truly one of the most remarkable ideas in all of statistics. It tells us that when we add up many independent random variables, their sum will follow a normal distribution, regardless of what the original distributions looked like. This is why we see normal distributions so often in real life – many natural phenomena are the result of numerous small, independent factors adding up.
+        What makes the CLT so powerful is its universality. Whether we're working with dice rolls, measurement errors, or stock market returns, as long as we have enough independent samples, their average or sum will be approximately normal. For sums, the distribution will be $\mathcal{N}(n\mu, n\sigma^2)$, and for averages, it's $\mathcal{N}(\mu, \frac{\sigma^2}{n})$.
+        The CLT gives us the foundation for confidence intervals, hypothesis testing, and many other statistical tools. Without it, we'd have a much harder time making sense of data when we don't know the underlying population distribution. Just remember that if you're working with discrete distributions, you'll need to apply a continuity correction to get more accurate results.
+        Next time you see a normal distribution in data, think about the Central Limit Theorem – it might be the reason behind that familiar bell curve!
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    # controls for the interactive explorer
+    distribution_type = mo.ui.dropdown(
+        options=["uniform", "exponential", "binomial", "poisson"],
+        value="uniform",
+        label="Distribution Type"
+    )
+    sample_size = mo.ui.slider(
+        start =1,
+        stop =100,
+        step=1,
+        value=30,
+        label="Sample Size (n)"
+    )
+    sim_count_slider = mo.ui.slider(
+        start =100,
+        stop =10000,
+        step=100,
+        value=1000,
+        label="Number of Simulations"
+    )
+    run_explorer_button = mo.ui.run_button(label="Run Simulation", kind="warn")
+    controls = mo.hstack([
+        mo.vstack([distribution_type, sample_size, sim_count_slider]),
+        run_explorer_button
+    ], justify='space-around')
+    return (
+        controls,
+        distribution_type,
+        run_explorer_button,
+        sample_size,
+        sim_count_slider,
+    )
+if __name__ == "__main__":
+    app.run()

probability/19_maximum_likelihood_estimation.py ADDED Viewed

	@@ -0,0 +1,1231 @@

+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "marimo",
+#     "matplotlib==3.10.1",
+#     "scipy==1.15.2",
+#     "numpy==2.2.4",
+#     "polars==0.20.2",
+#     "plotly==5.18.0",
+# ]
+# ///
+import marimo
+__generated_with = "0.12.0"
+app = marimo.App(width="medium", app_title="Maximum Likelihood Estimation")
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        # Maximum Likelihood Estimation
+        _This notebook is a computational companion to ["Probability for Computer Scientists"](https://chrispiech.github.io/probabilityForComputerScientists/en/part5/mle/), by Stanford professor Chris Piech._
+        Maximum Likelihood Estimation (MLE) is a fundamental method in statistics for estimating parameters of a probability distribution. The central idea is elegantly simple: **choose the parameters that make the observed data most likely**.
+        In this notebook, we'll try to understand MLE, starting with the core concept of likelihood and how it differs from probability. We'll explore how to formulate MLE problems mathematically and then solve them for various common distributions. Along the way, I've included some interactive visualizations to help build your intuition about these concepts. You'll see how MLE applies to real-world scenarios like linear regression, and hopefully gain a deeper appreciation for why this technique is so widely used in statistics and machine learning. Think of MLE as detective work - we have some evidence (our data) and we're trying to figure out the most plausible explanation (our parameters) for what we've observed.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Likelihood: The Core Concept
+        Before diving into MLE, we need to understand what "likelihood" means in a statistical context.
+        ### Data and Parameters
+        Suppose we have collected some data $X_1, X_2, \ldots, X_n$ that are independent and identically distributed (IID). We assume these data points come from a specific type of distribution (like Normal, Bernoulli, etc.) with unknown parameters $\theta$.
+        ### What is Likelihood?
+        Likelihood measures how probable our observed data is, given specific values of the parameters $\theta$.
+        /// note
+        **Probability vs. Likelihood**
+        - **Probability**: Given parameters $\theta$, what's the chance of observing data $X$?
+        - **Likelihood**: Given observed data $X$, how likely are different parameter values $\theta$?
+        ///
+        To simplify notation, we'll use $f(X=x|\Theta=\theta)$ to represent either the PMF or PDF of our data, conditioned on the parameters.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ### The Likelihood Function
+        Since we assume our data points are independent, the likelihood of all our data is the product of the likelihoods of each individual data point:
+        $$L(\theta) = \prod_{i=1}^n f(X_i = x_i|\Theta = \theta)$$
+        This function $L(\theta)$ gives us the likelihood of observing our entire dataset for different parameter values $\theta$.
+        /// tip
+        **Key Insight**: Different parameter values produce different likelihoods for the same data. Better parameter values will make the observed data more likely.
+        ///
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Maximum Likelihood Estimation
+        The core idea of MLE is to find the parameter values $\hat{\theta}$ that maximize the likelihood function:
+        $$\hat{\theta} = \underset{\theta}{\operatorname{argmax}} \, L(\theta)$$
+        The notation $\hat{\theta}$ represents our best estimate of the true parameters based on the observed data.
+        ### Working with Log-Likelihood
+        In practice, we usually work with the **log-likelihood** instead of the likelihood directly. Since logarithm is a monotonically increasing function, the maximum of $L(\theta)$ occurs at the same value of $\theta$ as the maximum of $\log L(\theta)$.
+        Taking the logarithm transforms our product into a sum, which is much easier to work with:
+        $$LL(\theta) = \log L(\theta) = \log \prod_{i=1}^n f(X_i=x_i|\Theta = \theta) = \sum_{i=1}^n \log f(X_i = x_i|\Theta = \theta)$$
+        /// warning
+        Working with products of many small probabilities can lead to numerical underflow. Taking the logarithm converts these products to sums, which is numerically more stable.
+        ///
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ### Finding the Maximum
+        To find the values of $\theta$ that maximize the log-likelihood, we typically:
+        1. Take the derivative of $LL(\theta)$ with respect to each parameter
+        2. Set each derivative equal to zero
+        3. Solve for the parameters
+        Let's see this approach in action with some common distributions.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## MLE for Bernoulli Distribution
+        Let's start with a simple example: estimating the parameter $p$ of a Bernoulli distribution.
+        ### The Model
+        A Bernoulli distribution has a single parameter $p$ which represents the probability of success (getting a value of 1). Its probability mass function (PMF) can be written as:
+        $$f(x|p) = p^x(1-p)^{1-x}, \quad x \in \{0, 1\}$$
+        This elegant formula works because:
+        - When $x = 1$: $f(1|p) = p^1(1-p)^0 = p$
+        - When $x = 0$: $f(0|p) = p^0(1-p)^1 = 1-p$
+        ### Deriving the MLE
+        Given $n$ independent Bernoulli trials $X_1, X_2, \ldots, X_n$, we want to find the value of $p$ that maximizes the likelihood of our observed data.
+        Step 1: Write the likelihood function
+        $$L(p) = \prod_{i=1}^n p^{x_i}(1-p)^{1-x_i}$$
+        Step 2: Take the logarithm to get the log-likelihood
+        $$\begin{align*}
+        LL(p) &= \sum_{i=1}^n \log(p^{x_i}(1-p)^{1-x_i}) \\
+        &= \sum_{i=1}^n \left[x_i \log(p) + (1-x_i)\log(1-p)\right] \\
+        &= \left(\sum_{i=1}^n x_i\right) \log(p) + \left(n - \sum_{i=1}^n x_i\right) \log(1-p) \\
+        &= Y\log(p) + (n-Y)\log(1-p)
+        \end{align*}$$
+        where $Y = \sum_{i=1}^n x_i$ is the total number of successes.
+        Step 3: Find the value of $p$ that maximizes $LL(p)$ by setting the derivative to zero
+        $$\begin{align*}
+        \frac{d\,LL(p)}{dp} &= \frac{Y}{p} - \frac{n-Y}{1-p} = 0 \\
+        \frac{Y}{p} &= \frac{n-Y}{1-p} \\
+        Y(1-p) &= p(n-Y) \\
+        Y - Yp &= pn - pY \\
+        Y &= pn \\
+        \hat{p} &= \frac{Y}{n} = \frac{\sum_{i=1}^n x_i}{n}
+        \end{align*}$$
+        /// tip
+        The MLE for the parameter $p$ in a Bernoulli distribution is simply the **sample mean** - the proportion of successes in our data!
+        ///
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(controls):
+    controls.center()
+    return
+@app.cell(hide_code=True)
+def _(generate_button, mo, np, plt, sample_size_slider, true_p_slider):
+    # generate bernoulli samples when button is clicked
+    bernoulli_button_value = generate_button.value
+    # get parameter values
+    bernoulli_true_p = true_p_slider.value
+    bernoulli_n = sample_size_slider.value
+    # generate data
+    bernoulli_data = np.random.binomial(1, bernoulli_true_p, size=bernoulli_n)
+    bernoulli_Y = np.sum(bernoulli_data)
+    bernoulli_p_hat = bernoulli_Y / bernoulli_n
+    # create visualization
+    bernoulli_fig, (bernoulli_ax1, bernoulli_ax2) = plt.subplots(1, 2, figsize=(12, 5))
+    # plot data histogram
+    bernoulli_ax1.hist(bernoulli_data, bins=[-0.5, 0.5, 1.5], rwidth=0.8, color='lightblue')
+    bernoulli_ax1.set_xticks([0, 1])
+    bernoulli_ax1.set_xticklabels(['Failure (0)', 'Success (1)'])
+    bernoulli_ax1.set_title(f'Bernoulli Data: {bernoulli_n} samples')
+    bernoulli_ax1.set_ylabel('Count')
+    bernoulli_y_counts = [bernoulli_n - bernoulli_Y, bernoulli_Y]
+    for bernoulli_idx, bernoulli_count in enumerate(bernoulli_y_counts):
+        bernoulli_ax1.text(bernoulli_idx, bernoulli_count/2, f"{bernoulli_count}",
+                 ha='center', va='center',
+                 color='white' if bernoulli_idx == 0 else 'black',
+                 fontweight='bold')
+    # calculate log-likelihood function
+    bernoulli_p_values = np.linspace(0.01, 0.99, 100)
+    bernoulli_ll_values = np.zeros_like(bernoulli_p_values)
+    for bernoulli_i, bernoulli_p in enumerate(bernoulli_p_values):
+        bernoulli_ll_values[bernoulli_i] = bernoulli_Y * np.log(bernoulli_p) + (bernoulli_n - bernoulli_Y) * np.log(1 - bernoulli_p)
+    # plot log-likelihood
+    bernoulli_ax2.plot(bernoulli_p_values, bernoulli_ll_values, 'b-', linewidth=2)
+    bernoulli_ax2.axvline(x=bernoulli_p_hat, color='r', linestyle='--', label=f'MLE: $\\hat{{p}} = {bernoulli_p_hat:.3f}$')
+    bernoulli_ax2.axvline(x=bernoulli_true_p, color='g', linestyle='--', label=f'True: $p = {bernoulli_true_p:.3f}$')
+    bernoulli_ax2.set_xlabel('$p$ (probability of success)')
+    bernoulli_ax2.set_ylabel('Log-Likelihood')
+    bernoulli_ax2.set_title('Log-Likelihood Function')
+    bernoulli_ax2.legend()
+    plt.tight_layout()
+    plt.gca()
+    # Create markdown to explain the results
+    bernoulli_explanation = mo.md(
+        f"""
+        ### Bernoulli MLE Results
+        **True parameter**: $p = {bernoulli_true_p:.3f}$
+        **Sample statistics**: {bernoulli_Y} successes out of {bernoulli_n} trials
+        **MLE estimate**: $\\hat{{p}} = \\frac{{{bernoulli_Y}}}{{{bernoulli_n}}} = {bernoulli_p_hat:.3f}$
+        The plot on the right shows the log-likelihood function $LL(p) = Y\\log(p) + (n-Y)\\log(1-p)$.
+        The red dashed line marks the maximum likelihood estimate $\\hat{{p}}$, and the green dashed line
+        shows the true parameter value.
+        /// note
+        Try increasing the sample size to see how the MLE estimate gets closer to the true parameter value!
+        ///
+        """
+    )
+    # Display plot and explanation together
+    mo.vstack([
+        bernoulli_fig,
+        bernoulli_explanation
+    ])
+    return (
+        bernoulli_Y,
+        bernoulli_ax1,
+        bernoulli_ax2,
+        bernoulli_button_value,
+        bernoulli_count,
+        bernoulli_data,
+        bernoulli_explanation,
+        bernoulli_fig,
+        bernoulli_i,
+        bernoulli_idx,
+        bernoulli_ll_values,
+        bernoulli_n,
+        bernoulli_p,
+        bernoulli_p_hat,
+        bernoulli_p_values,
+        bernoulli_true_p,
+        bernoulli_y_counts,
+    )
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## MLE for Normal Distribution
+        Next, let's look at a more complex example: estimating the parameters $\mu$ and $\sigma^2$ of a Normal distribution.
+        ### The Model
+        A Normal (Gaussian) distribution has two parameters:
+        - $\mu$: the mean
+        - $\sigma^2$: the variance
+        Its probability density function (PDF) is:
+        $$f(x|\mu, \sigma^2) = \frac{1}{\sqrt{2\pi\sigma^2}} \exp\left(-\frac{(x - \mu)^2}{2\sigma^2}\right)$$
+        ### Deriving the MLE
+        Given $n$ independent samples $X_1, X_2, \ldots, X_n$ from a Normal distribution, we want to find the values of $\mu$ and $\sigma^2$ that maximize the likelihood of our observed data.
+        Step 1: Write the likelihood function
+        $$L(\mu, \sigma^2) = \prod_{i=1}^n \frac{1}{\sqrt{2\pi\sigma^2}} \exp\left(-\frac{(x_i - \mu)^2}{2\sigma^2}\right)$$
+        Step 2: Take the logarithm to get the log-likelihood
+        $$\begin{align*}
+        LL(\mu, \sigma^2) &= \log\prod_{i=1}^n \frac{1}{\sqrt{2\pi\sigma^2}} \exp\left(-\frac{(x_i - \mu)^2}{2\sigma^2}\right) \\
+        &= \sum_{i=1}^n \log\left[\frac{1}{\sqrt{2\pi\sigma^2}} \exp\left(-\frac{(x_i - \mu)^2}{2\sigma^2}\right)\right] \\
+        &= \sum_{i=1}^n \left[-\frac{1}{2}\log(2\pi\sigma^2) - \frac{(x_i - \mu)^2}{2\sigma^2}\right] \\
+        &= -\frac{n}{2}\log(2\pi\sigma^2) - \frac{1}{2\sigma^2}\sum_{i=1}^n (x_i - \mu)^2
+        \end{align*}$$
+        Step 3: Find the values of $\mu$ and $\sigma^2$ that maximize $LL(\mu, \sigma^2)$ by setting the partial derivatives to zero.
+        For $\mu$:
+        $$\begin{align*}
+        \frac{\partial LL(\mu, \sigma^2)}{\partial \mu} &= \frac{1}{\sigma^2}\sum_{i=1}^n (x_i - \mu) = 0 \\
+        \sum_{i=1}^n (x_i - \mu) &= 0 \\
+        \sum_{i=1}^n x_i &= n\mu \\
+        \hat{\mu} &= \frac{1}{n}\sum_{i=1}^n x_i
+        \end{align*}$$
+        For $\sigma^2$:
+        $$\begin{align*}
+        \frac{\partial LL(\mu, \sigma^2)}{\partial \sigma^2} &= -\frac{n}{2\sigma^2} + \frac{1}{2(\sigma^2)^2}\sum_{i=1}^n (x_i - \mu)^2 = 0 \\
+        \frac{n}{2\sigma^2} &= \frac{1}{2(\sigma^2)^2}\sum_{i=1}^n (x_i - \mu)^2 \\
+        n\sigma^2 &= \sum_{i=1}^n (x_i - \mu)^2 \\
+        \hat{\sigma}^2 &= \frac{1}{n}\sum_{i=1}^n (x_i - \hat{\mu})^2
+        \end{align*}$$
+        /// tip
+        The MLE for a Normal distribution gives us:
+        - $\hat{\mu}$ = sample mean
+        - $\hat{\sigma}^2$ = sample variance (using $n$ in the denominator, not $n-1$)
+        ///
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(normal_controls):
+    normal_controls.center()
+    return
+@app.cell(hide_code=True)
+def _(
+    mo,
+    normal_generate_button,
+    normal_sample_size_slider,
+    np,
+    plt,
+    true_mu_slider,
+    true_sigma_slider,
+):
+    # generate normal samples when button is clicked
+    normal_button_value = normal_generate_button.value
+    # get parameter values
+    normal_true_mu = true_mu_slider.value
+    normal_true_sigma = true_sigma_slider.value
+    normal_true_var = normal_true_sigma**2
+    normal_n = normal_sample_size_slider.value
+    # generate random data
+    normal_data = np.random.normal(normal_true_mu, normal_true_sigma, size=normal_n)
+    # calculate mle estimates
+    normal_mu_hat = np.mean(normal_data)
+    normal_sigma2_hat = np.mean((normal_data - normal_mu_hat)**2)  # mle variance using n
+    normal_sigma_hat = np.sqrt(normal_sigma2_hat)
+    # create visualization
+    normal_fig, (normal_ax1, normal_ax2) = plt.subplots(1, 2, figsize=(12, 5))
+    # plot histogram and density curves
+    normal_bins = np.linspace(min(normal_data) - 1, max(normal_data) + 1, 30)
+    normal_ax1.hist(normal_data, bins=normal_bins, density=True, alpha=0.6, color='lightblue', label='Data Histogram')
+    # plot range for density curves
+    normal_x = np.linspace(min(normal_data) - 2*normal_true_sigma, max(normal_data) + 2*normal_true_sigma, 1000)
+    # plot true and mle densities
+    normal_true_pdf = (1/(normal_true_sigma * np.sqrt(2*np.pi))) * np.exp(-0.5 * ((normal_x - normal_true_mu)/normal_true_sigma)**2)
+    normal_ax1.plot(normal_x, normal_true_pdf, 'g-', linewidth=2, label=f'True: N({normal_true_mu:.2f}, {normal_true_var:.2f})')
+    normal_mle_pdf = (1/(normal_sigma_hat * np.sqrt(2*np.pi))) * np.exp(-0.5 * ((normal_x - normal_mu_hat)/normal_sigma_hat)**2)
+    normal_ax1.plot(normal_x, normal_mle_pdf, 'r--', linewidth=2, label=f'MLE: N({normal_mu_hat:.2f}, {normal_sigma2_hat:.2f})')
+    normal_ax1.set_xlabel('x')
+    normal_ax1.set_ylabel('Density')
+    normal_ax1.set_title(f'Normal Distribution: {normal_n} samples')
+    normal_ax1.legend()
+    # create contour plot of log-likelihood
+    normal_mu_range = np.linspace(normal_mu_hat - 2, normal_mu_hat + 2, 100)
+    normal_sigma_range = np.linspace(max(0.1, normal_sigma_hat - 1), normal_sigma_hat + 1, 100)
+    normal_mu_grid, normal_sigma_grid = np.meshgrid(normal_mu_range, normal_sigma_range)
+    normal_ll_grid = np.zeros_like(normal_mu_grid)
+    # calculate log-likelihood for each grid point
+    for normal_i in range(normal_mu_grid.shape[0]):
+        for normal_j in range(normal_mu_grid.shape[1]):
+            normal_mu = normal_mu_grid[normal_i, normal_j]
+            normal_sigma = normal_sigma_grid[normal_i, normal_j]
+            normal_ll = -normal_n/2 * np.log(2*np.pi*normal_sigma**2) - np.sum((normal_data - normal_mu)**2)/(2*normal_sigma**2)
+            normal_ll_grid[normal_i, normal_j] = normal_ll
+    # plot log-likelihood contour
+    normal_contour = normal_ax2.contourf(normal_mu_grid, normal_sigma_grid, normal_ll_grid, levels=50, cmap='viridis')
+    normal_ax2.set_xlabel('μ (mean)')
+    normal_ax2.set_ylabel('σ (standard deviation)')
+    normal_ax2.set_title('Log-Likelihood Contour')
+    # mark mle and true params
+    normal_ax2.plot(normal_mu_hat, normal_sigma_hat, 'rx', markersize=10, label='MLE Estimate')
+    normal_ax2.plot(normal_true_mu, normal_true_sigma, 'g*', markersize=10, label='True Parameters')
+    normal_ax2.legend()
+    plt.colorbar(normal_contour, ax=normal_ax2, label='Log-Likelihood')
+    plt.tight_layout()
+    plt.gca()
+    # relevant markdown for the results
+    normal_explanation = mo.md(
+        f"""
+        ### Normal MLE Results
+        **True parameters**: $\mu = {normal_true_mu:.3f}$, $\sigma^2 = {normal_true_var:.3f}$
+        **MLE estimates**: $\hat{{\mu}} = {normal_mu_hat:.3f}$, $\hat{{\sigma}}^2 = {normal_sigma2_hat:.3f}$
+        The left plot shows the data histogram with the true Normal distribution (green) and the MLE-estimated distribution (red dashed).
+        The right plot shows the log-likelihood function as a contour map in the $(\mu, \sigma)$ parameter space. The maximum likelihood estimates are marked with a red X, while the true parameters are marked with a green star.
+        /// note
+        Notice how the log-likelihood contour is more stretched along the σ axis than the μ axis. This indicates that we typically estimate the mean with greater precision than the standard deviation.
+        ///
+        /// tip
+        Increase the sample size to see how the MLE estimates converge to the true parameter values!
+        ///
+        """
+    )
+    # plot and explanation together
+    mo.vstack([
+        normal_fig,
+        normal_explanation
+    ])
+    return (
+        normal_ax1,
+        normal_ax2,
+        normal_bins,
+        normal_button_value,
+        normal_contour,
+        normal_data,
+        normal_explanation,
+        normal_fig,
+        normal_i,
+        normal_j,
+        normal_ll,
+        normal_ll_grid,
+        normal_mle_pdf,
+        normal_mu,
+        normal_mu_grid,
+        normal_mu_hat,
+        normal_mu_range,
+        normal_n,
+        normal_sigma,
+        normal_sigma2_hat,
+        normal_sigma_grid,
+        normal_sigma_hat,
+        normal_sigma_range,
+        normal_true_mu,
+        normal_true_pdf,
+        normal_true_sigma,
+        normal_true_var,
+        normal_x,
+    )
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## MLE for Linear Regression
+        Now let's look at a more practical example: using MLE to derive linear regression.
+        ### The Model
+        Consider a model where:
+        - We have pairs of observations $(X_1, Y_1), (X_2, Y_2), \ldots, (X_n, Y_n)$
+        - The relationship between $X$ and $Y$ follows: $Y = \theta X + Z$
+        - $Z \sim N(0, \sigma^2)$ is random noise
+        - Our goal is to estimate the parameter $\theta$
+        This means that for a given $X_i$, the conditional distribution of $Y_i$ is:
+        $$Y_i | X_i \sim N(\theta X_i, \sigma^2)$$
+        ### Deriving the MLE
+        Step 1: Write the likelihood function for each data point $(X_i, Y_i)$
+        $$f(Y_i | X_i, \theta) = \frac{1}{\sqrt{2\pi\sigma^2}} \exp\left(-\frac{(Y_i - \theta X_i)^2}{2\sigma^2}\right)$$
+        Step 2: Write the likelihood for all data
+        $$\begin{align*}
+        L(\theta) &= \prod_{i=1}^n f(Y_i, X_i | \theta) \\
+        &= \prod_{i=1}^n f(Y_i | X_i, \theta) \cdot f(X_i)
+        \end{align*}$$
+        Since $f(X_i)$ doesn't depend on $\theta$, we can simplify:
+        $$L(\theta) = \prod_{i=1}^n \frac{1}{\sqrt{2\pi\sigma^2}} \exp\left(-\frac{(Y_i - \theta X_i)^2}{2\sigma^2}\right) \cdot f(X_i)$$
+        Step 3: Take the logarithm to get the log-likelihood
+        $$\begin{align*}
+        LL(\theta) &= \log \prod_{i=1}^n \frac{1}{\sqrt{2\pi\sigma^2}} \exp\left(-\frac{(Y_i - \theta X_i)^2}{2\sigma^2}\right) \cdot f(X_i) \\
+        &= \sum_{i=1}^n \log\left[\frac{1}{\sqrt{2\pi\sigma^2}} \exp\left(-\frac{(Y_i - \theta X_i)^2}{2\sigma^2}\right)\right] + \sum_{i=1}^n \log f(X_i) \\
+        &= -\frac{n}{2} \log(2\pi\sigma^2) - \frac{1}{2\sigma^2} \sum_{i=1}^n (Y_i - \theta X_i)^2 + \sum_{i=1}^n \log f(X_i)
+        \end{align*}$$
+        Step 4: Since we only care about maximizing with respect to $\theta$, we can drop terms that don't contain $\theta$:
+        $$\hat{\theta} = \underset{\theta}{\operatorname{argmax}} \left[ -\frac{1}{2\sigma^2} \sum_{i=1}^n (Y_i - \theta X_i)^2 \right]$$
+        This is equivalent to:
+        $$\hat{\theta} = \underset{\theta}{\operatorname{argmin}} \sum_{i=1}^n (Y_i - \theta X_i)^2$$
+        Step 5: Find the value of $\theta$ that minimizes the sum of squared errors by setting the derivative to zero:
+        $$\begin{align*}
+        \frac{d}{d\theta} \sum_{i=1}^n (Y_i - \theta X_i)^2 &= 0 \\
+        \sum_{i=1}^n -2X_i(Y_i - \theta X_i) &= 0 \\
+        \sum_{i=1}^n X_i Y_i - \theta X_i^2 &= 0 \\
+        \sum_{i=1}^n X_i Y_i &= \theta \sum_{i=1}^n X_i^2 \\
+        \hat{\theta} &= \frac{\sum_{i=1}^n X_i Y_i}{\sum_{i=1}^n X_i^2}
+        \end{align*}$$
+        /// tip
+        **Key Insight**: MLE for this simple linear model gives us the least squares estimator! This is an important connection between MLE and regression.
+        ///
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(linear_controls):
+    linear_controls.center()
+    return
+@app.cell(hide_code=True)
+def _(
+    linear_generate_button,
+    linear_sample_size_slider,
+    mo,
+    noise_sigma_slider,
+    np,
+    plt,
+    true_theta_slider,
+):
+    # linear model data calc when button is clicked
+    linear_button_value = linear_generate_button.value
+    # get parameter values
+    linear_true_theta = true_theta_slider.value
+    linear_noise_sigma = noise_sigma_slider.value
+    linear_n = linear_sample_size_slider.value
+    # generate x data (uniformly between -3 and 3)
+    linear_X = np.random.uniform(-3, 3, size=linear_n)
+    # generate y data according to the model y = θx + z
+    linear_Z = np.random.normal(0, linear_noise_sigma, size=linear_n)
+    linear_Y = linear_true_theta * linear_X + linear_Z
+    # calculate mle estimate
+    linear_theta_hat = np.sum(linear_X * linear_Y) / np.sum(linear_X**2)
+    # calculate sse for different theta values
+    linear_theta_range = np.linspace(linear_true_theta - 1.5, linear_true_theta + 1.5, 100)
+    linear_sse_values = np.zeros_like(linear_theta_range)
+    for linear_i, linear_theta in enumerate(linear_theta_range):
+        linear_y_pred = linear_theta * linear_X
+        linear_sse_values[linear_i] = np.sum((linear_Y - linear_y_pred)**2)
+    # convert sse to log-likelihood (ignoring constant terms)
+    linear_ll_values = -linear_sse_values / (2 * linear_noise_sigma**2)
+    # create visualization
+    linear_fig, (linear_ax1, linear_ax2) = plt.subplots(1, 2, figsize=(12, 5))
+    # plot scatter plot with regression lines
+    linear_ax1.scatter(linear_X, linear_Y, color='blue', alpha=0.6, label='Data points')
+    # plot range for regression lines
+    linear_x_line = np.linspace(-3, 3, 100)
+    # plot true and mle regression lines
+    linear_ax1.plot(linear_x_line, linear_true_theta * linear_x_line, 'g-', linewidth=2, label=f'True: Y = {linear_true_theta:.2f}X')
+    linear_ax1.plot(linear_x_line, linear_theta_hat * linear_x_line, 'r--', linewidth=2, label=f'MLE: Y = {linear_theta_hat:.2f}X')
+    linear_ax1.set_xlabel('X')
+    linear_ax1.set_ylabel('Y')
+    linear_ax1.set_title(f'Linear Regression: {linear_n} data points')
+    linear_ax1.grid(True, alpha=0.3)
+    linear_ax1.legend()
+    # plot log-likelihood function
+    linear_ax2.plot(linear_theta_range, linear_ll_values, 'b-', linewidth=2)
+    linear_ax2.axvline(x=linear_theta_hat, color='r', linestyle='--', label=f'MLE: θ = {linear_theta_hat:.3f}')
+    linear_ax2.axvline(x=linear_true_theta, color='g', linestyle='--', label=f'True: θ = {linear_true_theta:.3f}')
+    linear_ax2.set_xlabel('θ (slope parameter)')
+    linear_ax2.set_ylabel('Log-Likelihood')
+    linear_ax2.set_title('Log-Likelihood Function')
+    linear_ax2.grid(True, alpha=0.3)
+    linear_ax2.legend()
+    plt.tight_layout()
+    plt.gca()
+    # relevant markdown to explain results
+    linear_explanation = mo.md(
+        f"""
+        ### Linear Regression MLE Results
+        **True parameter**: $\\theta = {linear_true_theta:.3f}$
+        **MLE estimate**: $\\hat{{\\theta}} = {linear_theta_hat:.3f}$
+        The left plot shows the scatter plot of data points with the true regression line (green) and the MLE-estimated regression line (red dashed).
+        The right plot shows the log-likelihood function for different values of $\\theta$. The maximum likelihood estimate is marked with a red dashed line, and the true parameter is marked with a green dashed line.
+        /// note
+        The MLE estimate $\\hat{{\\theta}} = \\frac{{\\sum_{{i=1}}^n X_i Y_i}}{{\\sum_{{i=1}}^n X_i^2}}$ minimizes the sum of squared errors between the predicted and actual Y values.
+        ///
+        /// tip
+        Try increasing the noise level to see how it affects the precision of the estimate!
+        ///
+        """
+    )
+    # show plot and explanation
+    mo.vstack([
+        linear_fig,
+        linear_explanation
+    ])
+    return (
+        linear_X,
+        linear_Y,
+        linear_Z,
+        linear_ax1,
+        linear_ax2,
+        linear_button_value,
+        linear_explanation,
+        linear_fig,
+        linear_i,
+        linear_ll_values,
+        linear_n,
+        linear_noise_sigma,
+        linear_sse_values,
+        linear_theta,
+        linear_theta_hat,
+        linear_theta_range,
+        linear_true_theta,
+        linear_x_line,
+        linear_y_pred,
+    )
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Interactive Concept: Density/Mass Functions vs. Likelihood
+        To better understand the distinction between likelihood and density/mass functions, let's create an interactive visualization. This concept is crucial for understanding why MLE works.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(concept_controls):
+    concept_controls.center()
+    return
+@app.cell(hide_code=True)
+def _(concept_dist_type, mo, np, perspective_selector, plt, stats):
+    # current distribution type
+    concept_dist_type_value = concept_dist_type.value
+    # view mode from dropdown
+    concept_view_mode = "likelihood" if perspective_selector.value == "Likelihood Perspective" else "probability"
+    # visualization based on distribution type
+    concept_fig, concept_ax = plt.subplots(figsize=(10, 6))
+    if concept_dist_type_value == "Normal":
+        if concept_view_mode == "probability":
+            # density function perspective: fixed params, varying data
+            concept_mu = 0      # fixed parameter
+            concept_sigma = 1   # fixed parameter
+            # generate x values for the pdf
+            concept_x = np.linspace(-4, 4, 1000)
+            # plot pdf
+            concept_pdf = stats.norm.pdf(concept_x, concept_mu, concept_sigma)
+            concept_ax.plot(concept_x, concept_pdf, 'b-', linewidth=2, label='PDF: N(0, 1)')
+            # highlight specific data values
+            concept_data_points = [-2, -1, 0, 1, 2]
+            concept_colors = ['#FF9999', '#FFCC99', '#99FF99', '#99CCFF', '#CC99FF']
+            for concept_i, concept_data in enumerate(concept_data_points):
+                concept_prob = stats.norm.pdf(concept_data, concept_mu, concept_sigma)
+                concept_ax.plot([concept_data, concept_data], [0, concept_prob], concept_colors[concept_i], linewidth=2)
+                concept_ax.scatter(concept_data, concept_prob, color=concept_colors[concept_i], s=50,
+                           label=f'PDF at x={concept_data}: {concept_prob:.3f}')
+            concept_ax.set_xlabel('Data (x)')
+            concept_ax.set_ylabel('Probability Density')
+            concept_ax.set_title('Density Function Perspective: Fixed Parameters (μ=0, σ=1), Different Data Points')
+        else:  # likelihood perspective
+            # likelihood perspective: fixed data, varying parameters
+            concept_data_point = 1.5  # fixed observed data
+            # different possible parameter values (means)
+            concept_mus = [-1, 0, 1, 2, 3]
+            concept_sigma = 1
+            # generate x values for multiple pdfs
+            concept_x = np.linspace(-4, 6, 1000)
+            concept_colors = ['#FF9999', '#FFCC99', '#99FF99', '#99CCFF', '#CC99FF']
+            for concept_i, concept_mu in enumerate(concept_mus):
+                concept_pdf = stats.norm.pdf(concept_x, concept_mu, concept_sigma)
+                concept_ax.plot(concept_x, concept_pdf, color=concept_colors[concept_i], linewidth=2, alpha=0.7,
+                        label=f'N({concept_mu}, 1)')
+                # mark the likelihood of the data point for this param
+                concept_likelihood = stats.norm.pdf(concept_data_point, concept_mu, concept_sigma)
+                concept_ax.plot([concept_data_point, concept_data_point], [0, concept_likelihood], concept_colors[concept_i], linewidth=2)
+                concept_ax.scatter(concept_data_point, concept_likelihood, color=concept_colors[concept_i], s=50,
+                           label=f'L(μ={concept_mu}|X=1.5) = {concept_likelihood:.3f}')
+            # add vertical line at the observed data point
+            concept_ax.axvline(x=concept_data_point, color='black', linestyle='--',
+                       label=f'Observed data: X=1.5')
+            concept_ax.set_xlabel('Data (x)')
+            concept_ax.set_ylabel('Probability Density / Likelihood')
+            concept_ax.set_title('Likelihood Perspective: Fixed Data Point (X=1.5), Different Parameter Values')
+    elif concept_dist_type_value == "Bernoulli":
+        if concept_view_mode == "probability":
+            # probability perspective: fixed parameter, two possible data values
+            concept_p = 0.3  # fixed parameter
+            # bar chart for p(x=0) and p(x=1)
+            concept_ax.bar([0, 1], [1-concept_p, concept_p], width=0.4, color=['#99CCFF', '#FF9999'],
+                   alpha=0.7, label=f'PMF: Bernoulli({concept_p})')
+            # text showing probabilities
+            concept_ax.text(0, (1-concept_p)/2, f'P(X=0|p={concept_p}) = {1-concept_p:.3f}', ha='center', va='center', fontweight='bold')
+            concept_ax.text(1, concept_p/2, f'P(X=1|p={concept_p}) = {concept_p:.3f}', ha='center', va='center', fontweight='bold')
+            concept_ax.set_xlabel('Data (x)')
+            concept_ax.set_ylabel('Probability')
+            concept_ax.set_xticks([0, 1])
+            concept_ax.set_xticklabels(['X=0', 'X=1'])
+            concept_ax.set_ylim(0, 1)
+            concept_ax.set_title('Probability Perspective: Fixed Parameter (p=0.3), Different Data Values')
+        else:  # likelihood perspective
+            # likelihood perspective: fixed data, varying parameter
+            concept_data_point = 1  # fixed observed data (success)
+            # different possible parameter values
+            concept_p_values = np.linspace(0.01, 0.99, 100)
+            # calculate likelihood for each p value
+            if concept_data_point == 1:
+                # for x=1, likelihood is p
+                concept_likelihood = concept_p_values
+                concept_ax.plot(concept_p_values, concept_likelihood, 'b-', linewidth=2,
+                        label=f'L(p|X=1) = p')
+                # highlight specific values
+                concept_highlight_ps = [0.2, 0.5, 0.8]
+                concept_colors = ['#FF9999', '#99FF99', '#99CCFF']
+                for concept_i, concept_p in enumerate(concept_highlight_ps):
+                    concept_ax.plot([concept_p, concept_p], [0, concept_p], concept_colors[concept_i], linewidth=2)
+                    concept_ax.scatter(concept_p, concept_p, color=concept_colors[concept_i], s=50,
+                               label=f'L(p={concept_p}|X=1) = {concept_p:.3f}')
+                concept_ax.set_title('Likelihood Perspective: Fixed Data Point (X=1), Different Parameter Values')
+            else:  # x=0
+                # for x = 0, likelihood is (1-p)
+                concept_likelihood = 1 - concept_p_values
+                concept_ax.plot(concept_p_values, concept_likelihood, 'r-', linewidth=2,
+                        label=f'L(p|X=0) = (1-p)')
+                # highlight some specific values
+                concept_highlight_ps = [0.2, 0.5, 0.8]
+                concept_colors = ['#FF9999', '#99FF99', '#99CCFF']
+                for concept_i, concept_p in enumerate(concept_highlight_ps):
+                    concept_ax.plot([concept_p, concept_p], [0, 1-concept_p], concept_colors[concept_i], linewidth=2)
+                    concept_ax.scatter(concept_p, 1-concept_p, color=concept_colors[concept_i], s=50,
+                               label=f'L(p={concept_p}|X=0) = {1-concept_p:.3f}')
+                concept_ax.set_title('Likelihood Perspective: Fixed Data Point (X=0), Different Parameter Values')
+            concept_ax.set_xlabel('Parameter (p)')
+            concept_ax.set_ylabel('Likelihood')
+            concept_ax.set_xlim(0, 1)
+            concept_ax.set_ylim(0, 1)
+    elif concept_dist_type_value == "Poisson":
+        if concept_view_mode == "probability":
+            # probability perspective: fixed parameter, different data values
+            concept_lam = 2.5  # fixed parameter
+            # pmf for different x values plot
+            concept_x_values = np.arange(0, 10)
+            concept_pmf_values = stats.poisson.pmf(concept_x_values, concept_lam)
+            concept_ax.bar(concept_x_values, concept_pmf_values, width=0.4, color='#99CCFF',
+                   alpha=0.7, label=f'PMF: Poisson({concept_lam})')
+            # highlight a few specific values
+            concept_highlight_xs = [1, 2, 3, 4]
+            concept_colors = ['#FF9999', '#99FF99', '#FFCC99', '#CC99FF']
+            for concept_i, concept_x in enumerate(concept_highlight_xs):
+                concept_prob = stats.poisson.pmf(concept_x, concept_lam)
+                concept_ax.scatter(concept_x, concept_prob, color=concept_colors[concept_i], s=50,
+                           label=f'P(X={concept_x}|λ={concept_lam}) = {concept_prob:.3f}')
+            concept_ax.set_xlabel('Data (x)')
+            concept_ax.set_ylabel('Probability')
+            concept_ax.set_xticks(concept_x_values)
+            concept_ax.set_title('Probability Perspective: Fixed Parameter (λ=2.5), Different Data Values')
+        else:  # likelihood perspective
+            # likelihood perspective: fixed data, varying parameter
+            concept_data_point = 4  # fixed observed data
+            # different possible param values
+            concept_lambda_values = np.linspace(0.1, 8, 100)
+            # calc likelihood for each lambda value
+            concept_likelihood = stats.poisson.pmf(concept_data_point, concept_lambda_values)
+            concept_ax.plot(concept_lambda_values, concept_likelihood, 'b-', linewidth=2,
+                    label=f'L(λ|X={concept_data_point})')
+            # highlight some specific values
+            concept_highlight_lambdas = [1, 2, 4, 6]
+            concept_colors = ['#FF9999', '#99FF99', '#99CCFF', '#FFCC99']
+            for concept_i, concept_lam in enumerate(concept_highlight_lambdas):
+                concept_like_val = stats.poisson.pmf(concept_data_point, concept_lam)
+                concept_ax.plot([concept_lam, concept_lam], [0, concept_like_val], concept_colors[concept_i], linewidth=2)
+                concept_ax.scatter(concept_lam, concept_like_val, color=concept_colors[concept_i], s=50,
+                           label=f'L(λ={concept_lam}|X={concept_data_point}) = {concept_like_val:.3f}')
+            concept_ax.set_xlabel('Parameter (λ)')
+            concept_ax.set_ylabel('Likelihood')
+            concept_ax.set_title(f'Likelihood Perspective: Fixed Data Point (X={concept_data_point}), Different Parameter Values')
+    concept_ax.legend(loc='best', fontsize=9)
+    concept_ax.grid(True, alpha=0.3)
+    plt.tight_layout()
+    plt.gca()
+    # relevant explanation based on view mode
+    if concept_view_mode == "probability":
+        concept_explanation = mo.md(
+            f"""
+            ### Density/Mass Function Perspective
+            In the **density/mass function perspective**, the parameters of the distribution are **fixed and known**, and we evaluate the function at **different possible data values**.
+            For the {concept_dist_type_value} distribution, we've fixed the parameter{'s' if concept_dist_type_value == 'Normal' else ''} and shown the {'density' if concept_dist_type_value == 'Normal' else 'probability mass'} function evaluated at different data points.
+            This is the typical perspective when:
+            - We know the true parameters of a distribution
+            - We want to evaluate the {'density' if concept_dist_type_value == 'Normal' else 'probability mass'} at different observations
+            - We make predictions based on our model
+            **Mathematical notation**: $f(x | \theta)$
+            """
+        )
+    else:  # likelihood perspective
+        concept_explanation = mo.md(
+            f"""
+            ### Likelihood Perspective
+            In the **likelihood perspective**, the observed data is **fixed and known**, and we calculate how likely different parameter values are to have generated that data.
+            For the {concept_dist_type_value} distribution, we've fixed the observed data point{'s' if concept_dist_type_value == 'Normal' else ''} and shown the likelihood of different parameter values.
+            This is the perspective used in MLE:
+            - We have observed data
+            - We don't know the true parameters
+            - We want to find parameters that best explain our observations
+            **Mathematical notation**: $L(\theta | X = x)$
+            /// tip
+            The value of $\\theta$ that maximizes this likelihood function is the MLE estimate $\\hat{{\\theta}}$!
+            ///
+            """
+        )
+    # Display plot and explanation together
+    mo.vstack([
+        concept_fig,
+        concept_explanation
+    ])
+    return (
+        concept_ax,
+        concept_colors,
+        concept_data,
+        concept_data_point,
+        concept_data_points,
+        concept_dist_type_value,
+        concept_explanation,
+        concept_fig,
+        concept_highlight_lambdas,
+        concept_highlight_ps,
+        concept_highlight_xs,
+        concept_i,
+        concept_lam,
+        concept_lambda_values,
+        concept_like_val,
+        concept_likelihood,
+        concept_mu,
+        concept_mus,
+        concept_p,
+        concept_p_values,
+        concept_pdf,
+        concept_pmf_values,
+        concept_prob,
+        concept_sigma,
+        concept_view_mode,
+        concept_x,
+        concept_x_values,
+    )
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## 🤔 Test Your Understanding
+        Which of the following statements about Maximum Likelihood Estimation are correct? Click each statement to check your answer.
+        /// details | Probability and likelihood have different interpretations: probability measures the chance of data given parameters, while likelihood measures how likely parameters are given data.
+        ✅ **Correct!**
+        Probability measures how likely it is to observe particular data when we know the parameters. Likelihood measures how likely particular parameter values are, given observed data.
+        Mathematically, probability is $P(X=x|\theta)$ while likelihood is $L(\theta|X=x)$.
+        ///
+        /// details | We use log-likelihood instead of likelihood because it's mathematically simpler and numerically more stable.
+        ✅ **Correct!**
+        We work with log-likelihood for several reasons:
+        1. It converts products into sums, which is easier to work with mathematically
+        2. It avoids numerical underflow when multiplying many small probabilities
+        3. Logarithm is a monotonically increasing function, so the maximum of the likelihood occurs at the same parameter values as the maximum of the log-likelihood
+        ///
+        /// details | For a Bernoulli distribution, the MLE for parameter p is the sample mean of the observations.
+        ✅ **Correct!**
+        For a Bernoulli distribution with parameter $p$, given $n$ independent samples $X_1, X_2, \ldots, X_n$, the MLE estimator is:
+        $$\hat{p} = \frac{\sum_{i=1}^n X_i}{n}$$
+        This is simply the sample mean, or the proportion of successes (1s) in the data.
+        ///
+        /// details | For a Normal distribution, MLE gives unbiased estimates for both mean and variance parameters.
+        ❌ **Incorrect.**
+        While the MLE for the mean ($\hat{\mu} = \frac{1}{n}\sum_{i=1}^n X_i$) is unbiased, the MLE for variance:
+        $$\hat{\sigma}^2 = \frac{1}{n}\sum_{i=1}^n (X_i - \hat{\mu})^2$$
+        is a biased estimator. It uses $n$ in the denominator rather than $n-1$ used in the unbiased estimator.
+        ///
+        /// details | MLE estimators are always unbiased regardless of the distribution.
+        ❌ **Incorrect.**
+        MLE is not always unbiased, though it often is asymptotically unbiased (meaning the bias approaches zero as the sample size increases).
+        A notable example is the MLE estimator for the variance of a Normal distribution:
+        $$\hat{\sigma}^2 = \frac{1}{n}\sum_{i=1}^n (X_i - \hat{\mu})^2$$
+        This estimator is biased, which is why we often use the unbiased estimator:
+        $$s^2 = \frac{1}{n-1}\sum_{i=1}^n (X_i - \hat{\mu})^2$$
+        Despite occasional bias, MLE estimators have many desirable properties, including consistency and asymptotic efficiency.
+        ///
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Summary
+        Maximum Likelihood Estimation really is one of those elegant ideas that sits at the core of modern statistics. When you get down to it, MLE is just about finding the most plausible explanation for the data we've observed. It's like being a detective - you have some clues (your data), and you're trying to piece together the most likely story (your parameters) that explains them.
+        We've seen how this works with different distributions. For the Bernoulli, it simply gives us the sample proportion. For the Normal, it gives us the sample mean and a slightly biased estimate of variance. And for linear regression, it provides a mathematical justification for the least squares method that everyone learns in basic stats classes.
+        What makes MLE so useful in practice is that it tends to give us estimates with good properties. As you collect more data, the estimates generally get closer to the true values (consistency) and do so efficiently. That's why MLE is everywhere in statistics and machine learning - from simple regression models to complex neural networks.
+        The most important takeaway? Next time you're fitting a model to data, remember that you're not just following a recipe - you're finding the parameters that make your observed data most likely to have occurred. That's the essence of statistical inference.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(
+        r"""
+        ## Further Reading
+        If you're curious to dive deeper into this topic, check out "Statistical Inference" by Casella and Berger - it's the classic text that many statisticians learned from. For a more machine learning angle, Bishop's "Pattern Recognition and Machine Learning" shows how MLE connects to more advanced topics like EM algorithms and Bayesian methods.
+        Beyond the basics we've covered, you might explore Bayesian estimation (which incorporates prior knowledge), Fisher Information (which tells us how precisely we can estimate parameters), or the EM algorithm (for when we have missing data or latent variables). Each of these builds on the foundation of likelihood that we've established here.
+        """
+    )
+    return
+@app.cell(hide_code=True)
+def _(mo):
+    mo.md(r"""## Appendix (helper functions and imports)""")
+    return
+@app.cell
+def _():
+    import marimo as mo
+    return (mo,)
+@app.cell
+def _():
+    import numpy as np
+    import matplotlib.pyplot as plt
+    from scipy import stats
+    import plotly.graph_objects as go
+    import polars as pl
+    from matplotlib import cm
+    # Set a consistent random seed for reproducibility
+    np.random.seed(42)
+    # Set a nice style for matplotlib
+    plt.style.use('seaborn-v0_8-darkgrid')
+    return cm, go, np, pl, plt, stats
+@app.cell(hide_code=True)
+def _(mo):
+    # Create interactive elements
+    true_p_slider = mo.ui.slider(
+        start =0.01,
+        stop =0.99,
+        value=0.3,
+        step=0.01,
+        label="True probability (p)"
+    )
+    sample_size_slider = mo.ui.slider(
+        start =10,
+        stop =1000,
+        value=100,
+        step=10,
+        label="Sample size (n)"
+    )
+    generate_button = mo.ui.button(label="Generate New Sample", kind="success")
+    controls = mo.vstack([
+        mo.vstack([true_p_slider, sample_size_slider]),
+        generate_button
+    ], justify="space-between")
+    return controls, generate_button, sample_size_slider, true_p_slider
+@app.cell(hide_code=True)
+def _(mo):
+    # Create interactive elements for Normal distribution
+    true_mu_slider = mo.ui.slider(
+        start =-5,
+        stop =5,
+        value=0,
+        step=0.1,
+        label="True mean (μ)"
+    )
+    true_sigma_slider = mo.ui.slider(
+        start =0.5,
+        stop =3,
+        value=1,
+        step=0.1,
+        label="True standard deviation (σ)"
+    )
+    normal_sample_size_slider = mo.ui.slider(
+        start =10,
+        stop =500,
+        value=50,
+        step=10,
+        label="Sample size (n)"
+    )
+    normal_generate_button = mo.ui.button(label="Generate New Sample", kind="warn")
+    normal_controls = mo.hstack([
+        mo.vstack([true_mu_slider, true_sigma_slider, normal_sample_size_slider]),
+        normal_generate_button
+    ], justify="space-between")
+    return (
+        normal_controls,
+        normal_generate_button,
+        normal_sample_size_slider,
+        true_mu_slider,
+        true_sigma_slider,
+    )
+@app.cell(hide_code=True)
+def _(mo):
+    # Create interactive elements for linear regression
+    true_theta_slider = mo.ui.slider(
+        start =-2,
+        stop =2,
+        value=0.5,
+        step=0.1,
+        label="True slope (θ)"
+    )
+    noise_sigma_slider = mo.ui.slider(
+        start =0.1,
+        stop =2,
+        value=0.5,
+        step=0.1,
+        label="Noise level (σ)"
+    )
+    linear_sample_size_slider = mo.ui.slider(
+        start =10,
+        stop =200,
+        value=50,
+        step=10,
+        label="Sample size (n)"
+    )
+    linear_generate_button = mo.ui.button(label="Generate New Sample", kind="warn")
+    linear_controls = mo.hstack([
+        mo.vstack([true_theta_slider, noise_sigma_slider, linear_sample_size_slider]),
+        linear_generate_button
+    ], justify="space-between")
+    return (
+        linear_controls,
+        linear_generate_button,
+        linear_sample_size_slider,
+        noise_sigma_slider,
+        true_theta_slider,
+    )
+@app.cell(hide_code=True)
+def _(mo):
+    # Interactive elements for likelihood vs probability demo
+    concept_dist_type = mo.ui.dropdown(
+        options=["Normal", "Bernoulli", "Poisson"],
+        value="Normal",
+        label="Distribution"
+    )
+    # Replace buttons with a simple dropdown selector
+    perspective_selector = mo.ui.dropdown(
+        options=["Probability Perspective", "Likelihood Perspective"],
+        value="Probability Perspective",
+        label="View"
+    )
+    concept_controls = mo.vstack([
+        mo.hstack([concept_dist_type, perspective_selector])
+    ])
+    return concept_controls, concept_dist_type, perspective_selector
+if __name__ == "__main__":
+    app.run()

python/006_dictionaries.py CHANGED Viewed

@@ -196,13 +196,13 @@ def _():
 @app.cell
 def _(mo, nested_data):
-    mo.md(f"Alice's age: {nested_data["users"]["alice"]["age"]}")
     return
 @app.cell
 def _(mo, nested_data):
-    mo.md(f"Bob's interests: {nested_data["users"]["bob"]["interests"]}")
     return

 @app.cell
 def _(mo, nested_data):
+    mo.md(f"Alice's age: {nested_data['users']['alice']['age']}")
     return
 @app.cell
 def _(mo, nested_data):
+    mo.md(f"Bob's interests: {nested_data['users']['bob']['interests']}")
     return

scripts/build.py ADDED Viewed

	@@ -0,0 +1,281 @@

+#!/usr/bin/env python3
+import os
+import subprocess
+import argparse
+import json
+import datetime
+import markdown
+from datetime import date
+from pathlib import Path
+from typing import Dict, List, Any, Optional, Tuple
+from jinja2 import Environment, FileSystemLoader
+def export_html_wasm(notebook_path: str, output_dir: str, as_app: bool = False) -> bool:
+    """Export a single marimo notebook to HTML format.
+    Args:
+        notebook_path: Path to the notebook to export
+        output_dir: Directory to write the output HTML files
+        as_app: If True, export as app instead of notebook
+    Returns:
+        bool: True if export succeeded, False otherwise
+    """
+    # Create directory for the output
+    os.makedirs(output_dir, exist_ok=True)
+    # Determine the output path (preserving directory structure)
+    rel_path = os.path.basename(os.path.dirname(notebook_path))
+    if rel_path != os.path.dirname(notebook_path):
+        # Create subdirectory if needed
+        os.makedirs(os.path.join(output_dir, rel_path), exist_ok=True)
+    # Determine output filename (same as input but with .html extension)
+    output_filename = os.path.basename(notebook_path).replace(".py", ".html")
+    output_path = os.path.join(output_dir, rel_path, output_filename)
+    # Run marimo export command
+    mode = "--mode app" if as_app else "--mode edit"
+    cmd = f"marimo export html-wasm {mode} {notebook_path} -o {output_path} --sandbox"
+    print(f"Exporting {notebook_path} to {rel_path}/{output_filename} as {'app' if as_app else 'notebook'}")
+    print(f"Running command: {cmd}")
+    try:
+        result = subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
+        print(f"Successfully exported {notebook_path} to {output_path}")
+        return True
+    except subprocess.CalledProcessError as e:
+        print(f"Error exporting {notebook_path}: {e}")
+        print(f"Command output: {e.output}")
+        return False
+def get_course_metadata(course_dir: Path) -> Dict[str, Any]:
+    """Extract metadata from a course directory.
+    Reads the README.md file to extract title and description.
+    Args:
+        course_dir: Path to the course directory
+    Returns:
+        Dict: Dictionary containing course metadata (title, description)
+    """
+    readme_path = course_dir / "README.md"
+    title = course_dir.name.replace("_", " ").title()
+    description = ""
+    description_html = ""
+    if readme_path.exists():
+        with open(readme_path, "r", encoding="utf-8") as f:
+            content = f.read()
+            # Try to extract title from first heading
+            title_match = content.split("\n")[0]
+            if title_match.startswith("# "):
+                title = title_match[2:].strip()
+            # Extract description from content after first heading
+            desc_content = "\n".join(content.split("\n")[1:]).strip()
+            if desc_content:
+                # Take first paragraph as description, preserve markdown formatting
+                description = desc_content.split("\n\n")[0].strip()
+                # Convert markdown to HTML
+                description_html = markdown.markdown(description)
+    return {
+        "title": title,
+        "description": description,
+        "description_html": description_html
+    }
+def organize_notebooks_by_course(all_notebooks: List[str]) -> Dict[str, Dict[str, Any]]:
+    """Organize notebooks by course.
+    Args:
+        all_notebooks: List of paths to notebooks
+    Returns:
+        Dict: A dictionary where keys are course directories and values are
+              metadata about the course and its notebooks
+    """
+    courses = {}
+    for notebook_path in sorted(all_notebooks):
+        # Parse the path to determine course
+        # The first directory in the path is the course
+        path_parts = Path(notebook_path).parts
+        if len(path_parts) < 2:
+            print(f"Skipping notebook with invalid path: {notebook_path}")
+            continue
+        course_id = path_parts[0]
+        # If this is a new course, initialize it
+        if course_id not in courses:
+            course_metadata = get_course_metadata(Path(course_id))
+            courses[course_id] = {
+                "id": course_id,
+                "title": course_metadata["title"],
+                "description": course_metadata["description"],
+                "description_html": course_metadata["description_html"],
+                "notebooks": []
+            }
+        # Extract the notebook number and name from the filename
+        filename = Path(notebook_path).name
+        basename = filename.replace(".py", "")
+        # Extract notebook metadata
+        notebook_title = basename.replace("_", " ").title()
+        # Try to extract a sequence number from the start of the filename
+        # Match patterns like: 01_xxx, 1_xxx, etc.
+        import re
+        number_match = re.match(r'^(\d+)(?:[_-]|$)', basename)
+        notebook_number = number_match.group(1) if number_match else None
+        # If we found a number, remove it from the title
+        if number_match:
+            notebook_title = re.sub(r'^\d+\s*[_-]?\s*', '', notebook_title)
+        # Calculate the HTML output path (for linking)
+        html_path = f"{course_id}/{filename.replace('.py', '.html')}"
+        # Add the notebook to the course
+        courses[course_id]["notebooks"].append({
+            "path": notebook_path,
+            "html_path": html_path,
+            "title": notebook_title,
+            "display_name": notebook_title,
+            "original_number": notebook_number
+        })
+    # Sort notebooks by number if available, otherwise by title
+    for course_id, course_data in courses.items():
+        # Sort the notebooks list by number and title
+        course_data["notebooks"] = sorted(
+            course_data["notebooks"],
+            key=lambda x: (
+                int(x["original_number"]) if x["original_number"] is not None else float('inf'),
+                x["title"]
+            )
+        )
+    return courses
+def generate_clean_tailwind_landing_page(courses: Dict[str, Dict[str, Any]], output_dir: str) -> None:
+    """Generate a clean tailwindcss landing page with green accents.
+    This generates a modern, minimal landing page for marimo notebooks using tailwindcss.
+    The page is designed with clean aesthetics and green color accents using Jinja2 templates.
+    Args:
+        courses: Dictionary of courses metadata
+        output_dir: Directory to write the output index.html file
+    """
+    print("Generating clean tailwindcss landing page")
+    index_path = os.path.join(output_dir, "index.html")
+    os.makedirs(output_dir, exist_ok=True)
+    # Load Jinja2 template
+    current_dir = Path(__file__).parent
+    templates_dir = current_dir / "templates"
+    env = Environment(loader=FileSystemLoader(templates_dir))
+    template = env.get_template('index.html')
+    try:
+        with open(index_path, "w", encoding="utf-8") as f:
+            # Render the template with the provided data
+            rendered_html = template.render(
+                courses=courses,
+                current_year=datetime.date.today().year
+            )
+            f.write(rendered_html)
+        print(f"Successfully generated clean tailwindcss landing page at {index_path}")
+    except IOError as e:
+        print(f"Error generating clean tailwindcss landing page: {e}")
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Build marimo notebooks")
+    parser.add_argument(
+        "--output-dir", default="_site", help="Output directory for built files"
+    )
+    parser.add_argument(
+        "--course-dirs", nargs="+", default=None,
+        help="Specific course directories to build (default: all directories with .py files)"
+    )
+    args = parser.parse_args()
+    # Find all course directories (directories containing .py files)
+    all_notebooks: List[str] = []
+    # Directories to exclude from course detection
+    excluded_dirs = ["scripts", "env", "__pycache__", ".git", ".github", "assets"]
+    if args.course_dirs:
+        course_dirs = args.course_dirs
+    else:
+        # Automatically detect course directories (any directory with .py files)
+        course_dirs = []
+        for item in os.listdir("."):
+            if (os.path.isdir(item) and
+                not item.startswith(".") and
+                not item.startswith("_") and
+                item not in excluded_dirs):
+                # Check if directory contains .py files
+                if list(Path(item).glob("*.py")):
+                    course_dirs.append(item)
+    print(f"Found course directories: {', '.join(course_dirs)}")
+    for directory in course_dirs:
+        dir_path = Path(directory)
+        if not dir_path.exists():
+            print(f"Warning: Directory not found: {dir_path}")
+            continue
+        notebooks = [str(path) for path in dir_path.rglob("*.py")
+                    if not path.name.startswith("_") and "/__pycache__/" not in str(path)]
+        all_notebooks.extend(notebooks)
+    if not all_notebooks:
+        print("No notebooks found!")
+        return
+    # Export notebooks sequentially
+    successful_notebooks = []
+    for nb in all_notebooks:
+        # Determine if notebook should be exported as app or notebook
+        # For now, export all as notebooks
+        if export_html_wasm(nb, args.output_dir, as_app=False):
+            successful_notebooks.append(nb)
+    # Organize notebooks by course (only include successfully exported notebooks)
+    courses = organize_notebooks_by_course(successful_notebooks)
+    # Generate landing page using Tailwind CSS
+    generate_clean_tailwind_landing_page(courses, args.output_dir)
+    # Save course data as JSON for potential use by other tools
+    courses_json_path = os.path.join(args.output_dir, "courses.json")
+    with open(courses_json_path, "w", encoding="utf-8") as f:
+        json.dump(courses, f, indent=2)
+    print(f"Build complete! Site generated in {args.output_dir}")
+    print(f"Successfully exported {len(successful_notebooks)} out of {len(all_notebooks)} notebooks")
+if __name__ == "__main__":
+    main()

scripts/preview.py ADDED Viewed

	@@ -0,0 +1,76 @@

+#!/usr/bin/env python3
+import os
+import subprocess
+import argparse
+import webbrowser
+import time
+import sys
+from pathlib import Path
+def main():
+    parser = argparse.ArgumentParser(description="Build and preview marimo notebooks site")
+    parser.add_argument(
+        "--port", default=8000, type=int, help="Port to run the server on"
+    )
+    parser.add_argument(
+        "--no-build", action="store_true", help="Skip building the site (just serve existing files)"
+    )
+    parser.add_argument(
+        "--output-dir", default="_site", help="Output directory for built files"
+    )
+    args = parser.parse_args()
+    # Store the current directory
+    original_dir = os.getcwd()
+    try:
+        # Build the site if not skipped
+        if not args.no_build:
+            print("Building site...")
+            build_script = Path("scripts/build.py")
+            if not build_script.exists():
+                print(f"Error: Build script not found at {build_script}")
+                return 1
+            result = subprocess.run(
+                [sys.executable, str(build_script), "--output-dir", args.output_dir],
+                check=False
+            )
+            if result.returncode != 0:
+                print("Warning: Build process completed with errors.")
+        # Check if the output directory exists
+        output_dir = Path(args.output_dir)
+        if not output_dir.exists():
+            print(f"Error: Output directory '{args.output_dir}' does not exist.")
+            return 1
+        # Change to the output directory
+        os.chdir(args.output_dir)
+        # Open the browser
+        url = f"http://localhost:{args.port}"
+        print(f"Opening {url} in your browser...")
+        webbrowser.open(url)
+        # Start the server
+        print(f"Starting server on port {args.port}...")
+        print("Press Ctrl+C to stop the server")
+        # Use the appropriate Python executable
+        subprocess.run([sys.executable, "-m", "http.server", str(args.port)])
+        return 0
+    except KeyboardInterrupt:
+        print("\nServer stopped.")
+        return 0
+    except Exception as e:
+        print(f"Error: {e}")
+        return 1
+    finally:
+        # Always return to the original directory
+        os.chdir(original_dir)
+if __name__ == "__main__":
+    sys.exit(main())

scripts/templates/index.html ADDED Viewed

	@@ -0,0 +1,174 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Marimo Learn - Interactive Python Notebooks</title>
+    <meta name="description" content="Learn Python, data science, and machine learning with interactive marimo notebooks">
+    <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/tailwind.min.css" rel="stylesheet">
+    <style>
+        :root {
+            --primary-green: #10B981;
+            --dark-green: #047857;
+            --light-green: #D1FAE5;
+        }
+        .bg-primary { background-color: var(--primary-green); }
+        .text-primary { color: var(--primary-green); }
+        .border-primary { border-color: var(--primary-green); }
+        .bg-light { background-color: var(--light-green); }
+        .hover-grow { transition: transform 0.2s ease; }
+        .hover-grow:hover { transform: scale(1.02); }
+        .card-shadow { box-shadow: 0 4px 6px rgba(0, 0, 0, 0.05), 0 1px 3px rgba(0, 0, 0, 0.1); }
+    </style>
+</head>
+<body class="bg-gray-50 text-gray-800 font-sans">
+    <!-- Hero Section -->
+    <header class="bg-white">
+        <div class="container mx-auto px-4 py-12 max-w-6xl">
+            <div class="flex flex-col md:flex-row items-center justify-between">
+                <div class="md:w-1/2 mb-8 md:mb-0 md:pr-12">
+                    <h1 class="text-4xl md:text-5xl font-bold mb-4">Interactive Python Learning with <span class="text-primary">marimo</span></h1>
+                    <p class="text-lg text-gray-600 mb-6">Explore our collection of interactive notebooks for Python, data science, and machine learning.</p>
+                    <div class="flex flex-wrap gap-4">
+                        <a href="#courses" class="bg-primary hover:bg-dark-green text-white font-medium py-2 px-6 rounded-md transition duration-300">Explore Courses</a>
+                        <a href="https://github.com/marimo-team/learn" target="_blank" class="bg-white border border-gray-300 hover:border-primary text-gray-700 font-medium py-2 px-6 rounded-md transition duration-300">View on GitHub</a>
+                    </div>
+                </div>
+                <div class="md:w-1/2">
+                    <div class="bg-light p-1 rounded-lg">
+                        <img src="https://github.com/marimo-team/learn/blob/main/assets/marimo-learn.png?raw=true" alt="Marimo Logo" class="w-64 h-64 mx-auto object-contain">
+                    </div>
+                </div>
+            </div>
+        </div>
+    </header>
+    <!-- Features Section -->
+    <section class="py-16 bg-gray-50">
+        <div class="container mx-auto px-4 max-w-6xl">
+            <h2 class="text-3xl font-bold text-center mb-12">Why Learn with <span class="text-primary">Marimo</span>?</h2>
+            <div class="grid md:grid-cols-3 gap-8">
+                <div class="bg-white p-6 rounded-lg card-shadow">
+                    <div class="w-12 h-12 bg-light rounded-full flex items-center justify-center mb-4">
+                        <svg xmlns="http://www.w3.org/2000/svg" class="h-6 w-6 text-primary" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                            <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M13 10V3L4 14h7v7l9-11h-7z" />
+                        </svg>
+                    </div>
+                    <h3 class="text-xl font-semibold mb-2">Interactive Learning</h3>
+                    <p class="text-gray-600">Learn by doing with interactive notebooks that run directly in your browser.</p>
+                </div>
+                <div class="bg-white p-6 rounded-lg card-shadow">
+                    <div class="w-12 h-12 bg-light rounded-full flex items-center justify-center mb-4">
+                        <svg xmlns="http://www.w3.org/2000/svg" class="h-6 w-6 text-primary" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                            <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M19.428 15.428a2 2 0 00-1.022-.547l-2.387-.477a6 6 0 00-3.86.517l-.318.158a6 6 0 01-3.86.517L6.05 15.21a2 2 0 00-1.806.547M8 4h8l-1 1v5.172a2 2 0 00.586 1.414l5 5c1.26 1.26.367 3.414-1.415 3.414H4.828c-1.782 0-2.674-2.154-1.414-3.414l5-5A2 2 0 009 10.172V5L8 4z" />
+                        </svg>
+                    </div>
+                    <h3 class="text-xl font-semibold mb-2">Practical Examples</h3>
+                    <p class="text-gray-600">Real-world examples and applications to reinforce your understanding.</p>
+                </div>
+                <div class="bg-white p-6 rounded-lg card-shadow">
+                    <div class="w-12 h-12 bg-light rounded-full flex items-center justify-center mb-4">
+                        <svg xmlns="http://www.w3.org/2000/svg" class="h-6 w-6 text-primary" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                            <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M12 6.253v13m0-13C10.832 5.477 9.246 5 7.5 5S4.168 5.477 3 6.253v13C4.168 18.477 5.754 18 7.5 18s3.332.477 4.5 1.253m0-13C13.168 5.477 14.754 5 16.5 5c1.747 0 3.332.477 4.5 1.253v13C19.832 18.477 18.247 18 16.5 18c-1.746 0-3.332.477-4.5 1.253" />
+                        </svg>
+                    </div>
+                    <h3 class="text-xl font-semibold mb-2">Comprehensive Curriculum</h3>
+                    <p class="text-gray-600">From Python basics to advanced machine learning concepts.</p>
+                </div>
+            </div>
+        </div>
+    </section>
+    <!-- Courses Section -->
+    <section id="courses" class="py-16 bg-white">
+        <div class="container mx-auto px-4 max-w-6xl">
+            <h2 class="text-3xl font-bold text-center mb-12">Explore Our <span class="text-primary">Courses</span></h2>
+            <div class="grid md:grid-cols-2 lg:grid-cols-3 gap-8">
+                {% for course_id, course in courses.items() %}
+                    {% set notebooks = course.get('notebooks', []) %}
+                    {% set notebook_count = notebooks|length %}
+                    {% if notebook_count > 0 %}
+                        {% set title = course.get('title', course_id|replace('_', ' ')|title) %}
+                        <div class="bg-white border border-gray-200 rounded-lg overflow-hidden hover-grow card-shadow">
+                            <div class="h-2 bg-primary"></div>
+                            <div class="p-6">
+                                <h3 class="text-xl font-semibold mb-2">{{ title }}</h3>
+                                <p class="text-gray-600 mb-4">
+                                    {% if course.get('description_html') %}
+                                        {{ course.get('description_html')|safe }}
+                                    {% endif %}
+                                </p>
+                                <div class="mb-4">
+                                    <span class="text-sm text-gray-500 block mb-2">{{ notebook_count }} notebooks:</span>
+                                    <ol class="space-y-1 list-decimal pl-5">
+                                        {% for notebook in notebooks %}
+                                            {% set notebook_title = notebook.get('title', notebook.get('path', '').split('/')[-1].replace('.py', '').replace('_', ' ').title()) %}
+                                            <li>
+                                                <a href="{{ notebook.get('html_path', '#') }}" class="text-primary hover:text-dark-green text-sm flex items-center">
+                                                    {{ notebook_title }}
+                                                </a>
+                                            </li>
+                                        {% endfor %}
+                                    </ol>
+                                </div>
+                            </div>
+                        </div>
+                    {% endif %}
+                {% endfor %}
+            </div>
+        </div>
+    </section>
+    <!-- Contribute Section -->
+    <section class="py-16 bg-light">
+        <div class="container mx-auto px-4 max-w-6xl text-center">
+            <h2 class="text-3xl font-bold mb-6">Want to Contribute?</h2>
+            <p class="text-lg text-gray-700 mb-8 max-w-2xl mx-auto">Help us improve these learning materials by contributing to the GitHub repository. We welcome new content, bug fixes, and improvements!</p>
+            <a href="https://github.com/marimo-team/learn" target="_blank" class="bg-primary hover:bg-dark-green text-white font-medium py-3 px-8 rounded-md transition duration-300 inline-flex items-center">
+                <svg class="w-5 h-5 mr-2" fill="currentColor" viewBox="0 0 20 20" xmlns="http://www.w3.org/2000/svg">
+                    <path fill-rule="evenodd" d="M10 0C4.477 0 0 4.477 0 10c0 4.42 2.87 8.17 6.84 9.5.5.08.66-.23.66-.5v-1.69c-2.77.6-3.36-1.34-3.36-1.34-.46-1.16-1.11-1.47-1.11-1.47-.91-.62.07-.6.07-.6 1 .07 1.53 1.03 1.53 1.03.87 1.52 2.34 1.07 2.91.83.09-.65.35-1.09.63-1.34-2.22-.25-4.55-1.11-4.55-4.92 0-1.11.38-2 1.03-2.71-.1-.25-.45-1.29.1-2.64 0 0 1.005-.315 3.3 1.23.96-.27 1.98-.405 3-.405s2.04.135 3 .405c2.295-1.56 3.3-1.23 3.3-1.23.66 1.65.24 2.88.12 3.18.765.84 1.23 1.905 1.23 3.225 0 4.605-2.805 5.625-5.475 5.925.435.375.81 1.095.81 2.22 0 1.605-.015 2.895-.015 3.3 0 .315.225.69.825.57A12.02 12.02 0 0024 12c0-6.63-5.37-12-12-12z" clip-rule="evenodd"></path>
+                </svg>
+                Contribute on GitHub
+            </a>
+        </div>
+    </section>
+    <!-- Footer -->
+    <footer class="bg-gray-800 text-white py-8">
+        <div class="container mx-auto px-4 max-w-6xl">
+            <div class="flex flex-col md:flex-row justify-between items-center">
+                <div class="mb-4 md:mb-0">
+                    <p>&copy; {{ current_year }} marimo. All rights reserved.</p>
+                </div>
+                <div class="flex space-x-4">
+                    <a href="https://github.com/marimo-team/learn" target="_blank" class="text-gray-300 hover:text-white transition duration-300">
+                        <svg class="w-6 h-6" fill="currentColor" viewBox="0 0 24 24" xmlns="http://www.w3.org/2000/svg">
+                            <path fill-rule="evenodd" d="M12 0C5.37 0 0 5.37 0 12c0 5.31 3.435 9.795 8.205 11.385.6.105.825-.255.825-.57 0-.285-.015-1.23-.015-2.235-3.015.555-3.795-.735-4.035-1.41-.135-.345-.72-1.41-1.23-1.695-.42-.225-1.02-.78-.015-.795.945-.015 1.62.87 1.845 1.23 1.08 1.815 2.805 1.305 3.495.99.105-.78.42-1.305.765-1.605-2.67-.3-5.46-1.335-5.46-5.925 0-1.305.465-2.385 1.23-3.225-.12-.3-.54-1.53.12-3.18 0 0 1.005-.315 3.3 1.23.96-.27 1.98-.405 3-.405s2.04.135 3 .405c2.295-1.56 3.3-1.23 3.3-1.23.66 1.65.24 2.88.12 3.18.765.84 1.23 1.905 1.23 3.225 0 4.605-2.805 5.625-5.475 5.925.435.375.81 1.095.81 2.22 0 1.605-.015 2.895-.015 3.3 0 .315.225.69.825.57A12.02 12.02 0 0024 12c0-6.63-5.37-12-12-12z" clip-rule="evenodd"></path>
+                        </svg>
+                    </a>
+                    <a href="https://marimo.io" target="_blank" class="text-gray-300 hover:text-white transition duration-300">
+                        <svg xmlns="http://www.w3.org/2000/svg" class="h-6 w-6" fill="none" viewBox="0 0 24 24" stroke="currentColor">
+                            <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M21 12a9 9 0 01-9 9m9-9a9 9 0 00-9-9m9 9H3m9 9a9 9 0 01-9-9m9 9c1.657 0 3-4.03 3-9s-1.343-9-3-9m0 18c-1.657 0-3-4.03-3-9s1.343-9 3-9m-9 9a9 9 0 019-9" />
+                        </svg>
+                    </a>
+                </div>
+            </div>
+        </div>
+    </footer>
+    <!-- Scripts -->
+    <script>
+        // Smooth scrolling for anchor links
+        document.querySelectorAll('a[href^="#"]').forEach(anchor => {
+            anchor.addEventListener('click', function (e) {
+                e.preventDefault();
+                document.querySelector(this.getAttribute('href')).scrollIntoView({
+                    behavior: 'smooth'
+                });
+            });
+        });
+    </script>
+</body>
+</html>