Generating Normally Distributed Data
np.random.normal
method
import numpy as np
# Parameters: mean=0.98, standard deviation=4, number of samples=10000
data = np.random.normal(loc=0, scale=1, size=10000)
print(data[:50]) # Print the first 50 data points
Probability Density Function (PDF)
scipy.stats
module's norm.pdf
from scipy.stats import norm
import numpy as np
# Parameters: mean=0.98, standard deviation=4, number of samples=10000
mean = 0.98 # Mean
std_dev = 4 # Standard deviation
data = np.random.normal(loc=0, scale=1, size=10000)
# Calculate PDF
pdf = norm.pdf(data, loc=mean, scale=std_dev)
print(pdf) #Probability Density
[0.09838715 0.09937757 0.09967584 ... 0.09653173 0.07867429 0.09311813]
PDF Plot
from scipy.stats import norm
import numpy as np
import matplotlib.pyplot as plt
# Parameters: mean=0.98, standard deviation=4, number of samples=10000
mean = 0.98 # Mean
std_dev = 4 # Standard deviation
data = np.random.normal(loc=mean , scale=std_dev, size=10000)
# Plot histogram (data distribution)
plt.hist(data, bins=30, density=True, alpha=0.6, color='b', label='Histogram')
# Plot probability density function (PDF)
x = np.linspace(min(data), max(data), 10000) # Define the x-axis range
pdf = norm.pdf(x, loc=mean, scale=std_dev) # Calculate the probability density function
plt.plot(x, pdf, 'k', linewidth=2, label='PDF (Normal)')
# Add title and legend
plt.title('Normal Distribution')
plt.xlabel('Value')
plt.ylabel('Density')
plt.legend()
# Display the plot
plt.show()
'k'
specifies a black curve. More options are available as shown in the table below:
Color | Code | Description |
---|---|---|
'b' |
Blue | blue |
'r' |
Red | red |
'g' |
Green | green |
'c' |
Cyan | cyan |
'm' |
Magenta | magenta |
'y' |
Yellow | yellow |
'k' |
Black | black |
'w' |
White | white |
Cumulative Distribution
norm.cdf
calculates the cumulative distribution function for the normal distribution.
from scipy.stats import norm
import numpy as np
import matplotlib.pyplot as plt
# Parameters: mean=0.98, standard deviation=4, number of samples=10000
mean = 0.98 # Mean
std_dev = 4 # Standard deviation
data = np.random.normal(loc=mean , scale=std_dev, size=10000)
# Plot histogram (data distribution)
plt.hist(data, bins=30, density=True, alpha=0.6, color='b', label='Histogram')
# Plot cumulative distribution function (CDF)
x = np.linspace(min(data), max(data), 10000) # Define the x-axis range
cdf = norm.cdf(x, loc=mean, scale=std_dev) # Calculate the cumulative distribution function
plt.plot(x, cdf, 'k', linewidth=2, label='CDF (Normal)')
# Add title and legend
plt.title('Normal Distribution')
plt.xlabel('Value')
plt.ylabel('Density')
plt.legend()
# Display the plot
plt.show()
Normal Distribution Percent Point Function
norm.ppf
calculates the quantile (inverse of CDF) for a given probability.
from scipy.stats import norm
import numpy as np
# Calculate quantiles, for example, for probabilities 0.018 and 0.819
q1 = norm.ppf(0.018, loc=0, scale=1)
q2 = norm.ppf(0.819, loc=0, scale=1)
print(f"0.018 quantile: {q1}, 0.819 quantile: {q2}")
PS E:\learn\learnpy> & "D:/Program Files/Python311/python.exe" e:/learn/learnpy/learn.py
0.018 quantile: -2.0969274291643423, 0.819 quantile: 0.9115607350675405
PS E:\learn\learnpy>
Normal Distribution Fitting
scipy.stats
's norm.fit
for normal distribution fitting, estimating mean and standard deviation.
from scipy.stats import norm
import numpy as np
# Generate normally distributed data
data = np.random.normal(loc=3, scale=1.67, size=10000)
# Fit the data
mu, sigma = norm.fit(data)
print(f"Fitted mean: {mu}, Fitted standard deviation: {sigma}")
PS E:\learn\learnpy> & "D:/Program Files/Python311/python.exe" e:/learn/learnpy/learn.py
Fitted mean: 3.006577810135438, Fitted standard deviation: 1.672727044555993
Top comments (0)