AnscombeQuartet
Anscombe's Quartet Analysis¶
In [ ]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import io
import requests
In [7]:
# Load the dataset
url = "data/anscombe.csv"
data = pd.read_csv(url)
# Display the first few rows of the dataset
data.head()
Out[7]:
dataset | x | y | |
---|---|---|---|
0 | I | 10.0 | 8.04 |
1 | I | 8.0 | 6.95 |
2 | I | 13.0 | 7.58 |
3 | I | 9.0 | 8.81 |
4 | I | 11.0 | 8.33 |
In [4]:
# Create a function to plot Anscombe's quartet with a dark theme
def plot_anscombe(data):
# Set the dark background style
plt.style.use('dark_background')
# Set up the figure and axes
fig, axs = plt.subplots(2, 2, figsize=(12, 10))
fig.suptitle("Anscombe's Quartet", fontsize=16, color='white')
# Loop through each dataset in Anscombe's quartet
for i, dataset in enumerate(['I', 'II', 'III', 'IV']):
subset = data[data['dataset'] == dataset]
x = subset['x']
y = subset['y']
# Create a scatter plot
axs[i // 2, i % 2].scatter(x, y, color='cyan')
axs[i // 2, i % 2].plot(x, np.poly1d(np.polyfit(x, y, 1))(x), color='orange')
axs[i // 2, i % 2].set_title(f"Dataset {dataset}", color='white')
axs[i // 2, i % 2].set_xlabel("x", color='white')
axs[i // 2, i % 2].set_ylabel("y", color='white')
axs[i // 2, i % 2].tick_params(colors='white') # Change tick color to white
plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()
This Jupyter Notebook is designed to demonstrate the capabilities of JupyterLite in the context of data science, specifically using popular libraries such as NumPy, Pandas, and Matplotlib. The focus of the analysis is on Anscombe's quartet, a well-known dataset that illustrates the importance of data visualization in statistical analysis.
In [ ]:
# Call the function to plot the data
plot_anscombe(data)
In [ ]: