In [2]:
# Install necessary libraries if not already installed
!pip install pandas statsmodels linearmodels

import pandas as pd
import numpy as np
import statsmodels.api as sm
from linearmodels.panel import PooledOLS


Collecting linearmodels
  Downloading linearmodels-6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.9 kB)
Collecting mypy-extensions>=0.4 (from linearmodels)
  Downloading mypy_extensions-1.1.0-py3-none-any.whl.metadata (1.1 kB)
Collecting pyhdfe>=0.1 (from linearmodels)
  Downloading pyhdfe-0.2.0-py3-none-any.whl.metadata (4.0 kB)
Collecting formulaic>=1.0.0 (from linearmodels)
  Downloading formulaic-1.1.1-py3-none-any.whl.metadata (6.9 kB)
Collecting setuptools-scm<9.0.0,>=8.0.0 (from setuptools-scm[toml]<9.0.0,>=8.0.0->linearmodels)
  Downloading setuptools_scm-8.3.1-py3-none-any.whl.metadata (7.0 kB)
Collecting interface-meta>=1.2.0 (from formulaic>=1.0.0->linearmodels)
  Downloading interface_meta-1.3.0-py3-none-any.whl.metadata (6.7 kB)
Downloading linearmodels-6.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.7 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ

In [3]:
# Example data: 3 individuals over 3 years
data = {
    'id': [1, 1, 1, 2, 2, 2, 3, 3, 3],
    'year': [1, 2, 3, 1, 2, 3, 1, 2, 3],
    'education': [12, 12, 12, 16, 16, 16, 14, 14, 14],
    'experience': [1, 2, 3, 1, 2, 3, 1, 2, 3],
    'wage': [30, 32, 35, 40, 42, 45, 35, 37, 40]
}

df = pd.DataFrame(data)

# Set MultiIndex: entity (id) and time (year)
df = df.set_index(['id', 'year'])
print(df)


         education  experience  wage
id year                             
1  1            12           1    30
   2            12           2    32
   3            12           3    35
2  1            16           1    40
   2            16           2    42
   3            16           3    45
3  1            14           1    35
   2            14           2    37
   3            14           3    40


In [4]:
# Define y and X
y = df['wage']
X = df[['education', 'experience']]
X = sm.add_constant(X)  # Adds a constant term to the model


In [5]:
from linearmodels.panel import PooledOLS

# Fit the Pooled OLS model
pooled_ols = PooledOLS(y, X)
pooled_ols_results = pooled_ols.fit()

print(pooled_ols_results.summary)


                          PooledOLS Estimation Summary                          
Dep. Variable:                   wage   R-squared:                        0.9973
Estimator:                  PooledOLS   R-squared (Between):              1.0000
No. Observations:                   9   R-squared (Within):               0.9868
Date:                Mon, Jun 30 2025   R-squared (Overall):              0.9973
Time:                        10:18:00   Log-likelihood                    0.2362
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      1125.0
Entities:                           3   P-value                           0.0000
Avg Obs:                       3.0000   Distribution:                     F(2,6)
Min Obs:                       3.0000                                           
Max Obs:                       3.0000   F-statistic (robust):             1125.0
                            

In [7]:
# Import again for clarity
from linearmodels.panel import PooledOLS

# Fit the model
pooled_ols = PooledOLS(y, X)
results = pooled_ols.fit()

# üîç Access individual results

# Coefficient estimates
print("\nCoefficient Estimates:")
print(results.params)

# Standard errors
print("\nStandard Errors:")
print(results.std_errors)

# t-statistics
print("\nT-Statistics:")
print(results.tstats)

# P-values
print("\nP-Values:")
print(results.pvalues)

# Goodness-of-fit metrics (R-squared)
print("\nR-squared:")
print(results.rsquared)



Coefficient Estimates:
const        -2.666667
education     2.500000
experience    2.500000
Name: parameter, dtype: float64

Standard Errors:
const         0.863348
education     0.058926
experience    0.117851
Name: std_error, dtype: float64

T-Statistics:
const         -3.088749
education     42.426407
experience    21.213203
Name: tstat, dtype: float64

P-Values:
const         2.142205e-02
education     1.147341e-08
experience    7.154246e-07
Name: pvalue, dtype: float64

R-squared:
0.9973404255319149
