DataSet = [13, 15, 16, 16, 19, 20, 20, 21, 22, 22, 25, 25, 25, 25, 30, 33, 33, 35, 35, 35, 36, 40, 45, 46, 52, 70]

# Sum of all elements using simple built in sum function

print("Sum of all items of Data Set : " + str(sum(DataSet)))

# Getting Count of each items using counter collection 

""" 
Counter is an unordered collection where elements are stored as Dict keys and their count as dict value 

"""

from collections import Counter 
print("Count of each items in Data Set : ")
print(Counter(DataSet))

# Use of statistics module

import statistics as st

# Mean -> Sum of all data items / total no of data items

print("Mean of Data Set : ")
print(st.mean(DataSet))

# Median -> Average of two items exist in mid of data set 

print("Median of Data Set : ")
print(st.median(DataSet))

# Mode -> Item with highest frequency of appearance 

print("Mode of Data Set : ")
print(st.mode(DataSet))

# Mid-range -> Average of MaxVale And MinValue item

print("Mid Range Value Of Data Set : ")
print(st.mean([max(DataSet), min(DataSet)]))

# Other Useful statistical measures

print("Quantiles Of Data Set : ")
print(st.quantiles(data = DataSet, n = 4)) # [20.0, 25.0, 35.25]
print("Std. Deviation Of Data Set : ")
print(st.stdev(DataSet))
print("Variance Of Data Set : ")
print(st.variance(DataSet))

Sum of all items of Data Set : 774
Count of each items in Data Set : 
Counter({25: 4, 35: 3, 16: 2, 20: 2, 22: 2, 33: 2, 13: 1, 15: 1, 19: 1, 21: 1, 30: 1, 36: 1, 40: 1, 45: 1, 46: 1, 52: 1, 70: 1})
Mean of Data Set : 
29.76923076923077
Median of Data Set : 
25.0
Mode of Data Set : 
25
Mid Range Value Of Data Set : 
41.5
Quantiles Of Data Set : 
[20.0, 25.0, 35.25]
Std. Deviation Of Data Set : 
13.158442741624686
Variance Of Data Set : 
173.14461538461538


                        
                            import pandas as pd


                        
                            df = pd.read_csv('HeightWeight.csv')


                        
                            df.head()


                        
                            st.correlation(df['Height(Inches)'], df['Weight(Pounds)'])

0.5028585206028441


                        
                            # New in version 3.10
    slope, intercept = st.linear_regression(df['Height(Inches)'], df['Weight(Pounds)'])


                        
                            slope, intercept

(3.0834764454029657, -82.57574306454092)


                        
                            !pip show statsmodels

Name: statsmodels
Version: 0.13.5
Summary: Statistical computations and models for Python
Home-page: https://www.statsmodels.org/
Author: 
Author-email: 
License: BSD License
Location: /home/ashish/anaconda3/envs/py310/lib/python3.10/site-packages
Requires: numpy, packaging, pandas, patsy, scipy
Required-by:


                        
                            # Calculating various statistics value for a data set using statmodels , sciPy , numpy and pandas module functions
# importing required modules 

from statsmodels import stats
import statsmodels.stats.weightstats as ws
import statsmodels.stats.descriptivestats as ds
import statsmodels.stats.libqsturng as lq


                        
DataSet = [13, 15, 16, 16, 19, 20, 20, 21, 22, 22, 25, 25, 25, 25, 30, 33, 33, 35, 35, 35, 36, 40, 45, 46, 52, 70]


                        
mean = ws.stats.gmean(DataSet)
print(mean)

# median = ws.stats.median(DataSet)
# AttributeError: module 'scipy.stats' has no attribute 'median'

desc_stats = ds.describe(DataSet)
print("desc_stats using statsmodels : ", desc_stats)

27.347117200207276
desc_stats using statsmodels :                            0
nobs              26.000000
missing            0.000000
mean              29.769231
std_err            2.580583
upper_ci          34.827080
lower_ci          24.711381
std               13.158443
iqr               14.750000
iqr_normal        10.934191
mad               10.213018
mad_normal        12.800120
coef_var           0.442015
range             57.000000
max               70.000000
min               13.000000
skew               1.206785
kurtosis           4.506284
jarque_bera        8.768727
jarque_bera_pval   0.012471
mode              25.000000
mode_freq          0.153846
median            25.000000
1%                13.500000
5%                15.250000
10%               16.000000
25%               20.250000
50%               25.000000
75%               35.000000
90%               45.500000
95%               50.500000
99%               65.500000


                        
                            type(desc_stats) # pandas.core.frame.DataFrame

pandas.core.frame.DataFrame


                        
                            for i in ['mean', 'median', 'mode', 'std', '25%', '50%', '75%', 
              'iqr', 'min', 'max']:
        print(i, desc_stats.loc[i][0])

mean 29.76923076923077
median 25.0
mode 25.0
std 13.158442741624686
25% 20.25
50% 25.0
75% 35.0
iqr 14.75
min 13.0
max 70.0


                        
import pandas as pd
mean = pd.Series(DataSet).describe()
print("Mean using pandas :", mean)

Mean using pandas : count    26.000000
mean     29.769231
std      13.158443
min      13.000000
25%      20.250000
50%      25.000000
75%      35.000000
max      70.000000
dtype: float64


                        
                            import statsmodels.api as sm
    import pandas as pd


                        
                            df = pd.read_csv('HeightWeight.csv')


                        
                            results = sm.OLS(df['Height(Inches)'], df['Weight(Pounds)']).fit()


                        
                            results.summary()

	Index	Height(Inches)	Weight(Pounds)
0	1	65.78331	112.9925
1	2	71.51521	136.4873
2	3	69.39874	153.0269
3	4	68.21660	142.3354
4	5	67.78781	144.2971

Dep. Variable:	Height(Inches)	R-squared (uncentered):	0.993
Model:	OLS	Adj. R-squared (uncentered):	0.993
Method:	Least Squares	F-statistic:	3.783e+06
Date:	Mon, 15 May 2023	Prob (F-statistic):	0.00
Time:	13:09:01	Log-Likelihood:	-78144.
No. Observations:	25000	AIC:	1.563e+05
Df Residuals:	24999	BIC:	1.563e+05
Df Model:	1
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
Weight(Pounds)	0.5313	0.000	1944.918	0.000	0.531	0.532

Omnibus:	3.114	Durbin-Watson:	1.974
Prob(Omnibus):	0.211	Jarque-Bera (JB):	3.091
Skew:	0.024	Prob(JB):	0.213
Kurtosis:	3.025	Cond. No.	1.00

Method	Description
math.acos()	Returns the arc cosine of a number
math.acosh()	Returns the inverse hyperbolic cosine of a number
math.asin()	Returns the arc sine of a number
math.asinh()	Returns the inverse hyperbolic sine of a number
math.atan()	Returns the arc tangent of a number in radians
math.atan2()	Returns the arc tangent of y/x in radians
math.atanh()	Returns the inverse hyperbolic tangent of a number
math.ceil()	Rounds a number up to the nearest integer
math.comb()	Returns the number of ways to choose k items from n items without repetition and order
math.copysign()	Returns a float consisting of the value of the first parameter and the sign of the second parameter
math.cos()	Returns the cosine of a number
math.cosh()	Returns the hyperbolic cosine of a number
math.degrees()	Converts an angle from radians to degrees
math.dist()	Returns the Euclidean distance between two points (p and q), where p and q are the coordinates of that point
math.erf()	Returns the error function of a number
math.erfc()	Returns the complementary error function of a number
math.exp()	Returns E raised to the power of x
math.expm1()	Returns E^x - 1
math.fabs()	Returns the absolute value of a number
math.factorial()	Returns the factorial of a number
math.floor()	Rounds a number down to the nearest integer
math.fmod()	Returns the remainder of x/y
math.frexp()	Returns the mantissa and the exponent, of a specified number
math.fsum()	Returns the sum of all items in any iterable (tuples, arrays, lists, etc.)
math.gamma()	Returns the gamma function at x
math.gcd()	Returns the greatest common divisor of two integers
math.hypot()	Returns the Euclidean norm
math.isclose()	Checks whether two values are close to each other, or not
math.isfinite()	Checks whether a number is finite or not
math.isinf()	Checks whether a number is infinite or not
math.isnan()	Checks whether a value is NaN (not a number) or not
math.isqrt()	Rounds a square root number downwards to the nearest integer
math.ldexp()	Returns the inverse of math.frexp() which is x * (2**i) of the given numbers x and i
math.lgamma()	Returns the log gamma value of x
math.log()	Returns the natural logarithm of a number, or the logarithm of number to base
math.log10()	Returns the base-10 logarithm of x
math.log1p()	Returns the natural logarithm of 1+x
math.log2()	Returns the base-2 logarithm of x
math.perm()	Returns the number of ways to choose k items from n items with order and without repetition
math.pow()	Returns the value of x to the power of y
math.prod()	Returns the product of all the elements in an iterable
math.radians()	Converts a degree value into radians
math.remainder()	Returns the closest value that can make numerator completely divisible by the denominator
math.sin()	Returns the sine of a number
math.sinh()	Returns the hyperbolic sine of a number
math.sqrt()	Returns the square root of a number
math.tan()	Returns the tangent of a number
math.tanh()	Returns the hyperbolic tangent of a number
math.trunc()	Returns the truncated integer parts of a number

math-eight

Pages

Monday, 15 May 2023

Descriptive Statistics and Linear Regression Using 'statistics' module and 'statsmodels' module

Using 'statistics' module

Linear Regression¶

Using 'statsmodels' module

Linear Regression¶

Friday, 12 May 2023

Python 'math' Module, 'statistics' Module and Descriptive statistics using Pandas, NumPy, SciPy and StatsModels

Python math Module

Math Methods

Math Constants

Python statistics Module

Averages and measures of central location

Measures of spread

Statistics for relations between two inputs

NormalDist

Thursday, 11 May 2023

Index of Lessons in Technology

Artificial Intelligence

Data Mining

Distinguishing Between Artificial Intelligence and Data Science Using Images

Artificial Intelligence & Data Science

AI, ML and Deep Learning in a Euler Diagram

A broad view of Data Science

Data Science Venn Diagram

Machine Learning

Data Science

Data Scientist and Analyst uses –

Sunday, 9 April 2023

Animation for Single Digit Subtraction

Note: We will subtract the smaller number from the larger number.

Select first number:

Select second number:

Reading App

Constant	Description
math.e	Returns Euler's number (2.7182...)
math.inf	Returns a floating-point positive infinity
math.nan	Returns a floating-point NaN (Not a Number) value
math.pi	Returns PI (3.1415...)
math.tau	Returns tau (6.2831...)

`mean()`	Arithmetic mean (“average”) of data.
`fmean()`	Fast, floating point arithmetic mean, with optional weighting.
`geometric_mean()`	Geometric mean of data.
`harmonic_mean()`	Harmonic mean of data.
`median()`	Median (middle value) of data.
`median_low()`	Low median of data.
`median_high()`	High median of data.
`median_grouped()`	Median, or 50th percentile, of grouped data.
`mode()`	Single mode (most common value) of discrete or nominal data.
`multimode()`	List of modes (most common values) of discrete or nominal data.
`quantiles()`	Divide data into intervals with equal probability.

`pstdev()`	Population standard deviation of data.
`pvariance()`	Population variance of data.
`stdev()`	Sample standard deviation of data.
`variance()`	Sample variance of data.

`covariance()`	Sample covariance for two variables.
`correlation()`	Pearson's correlation coefficient for two variables.
`linear_regression()`	Slope and intercept for simple linear regression.