Pandas: Create a DataFrame with MultiIndex for columns

31 July 2020

Code

Option # 1: Using .from_product()

# Import library
import pandas as pd
import numpy as np 

# Create MultiIndex
years = [2020, 2021]
season = ['summer', 'winter']
index = pd.MultiIndex.from_product([years, season],
                                  names=['year', 'season'])

# Create DataFrame
df = pd.DataFrame(np.random.randint(low=20,high=40,size=(3,4)),
    columns = index
)

# Output
print(df)
print(df.columns)

Output

year     2020          2021       
season summer winter summer winter
0          33     24     33     25
1          29     26     34     39
2          23     28     38     34
MultiIndex([(2020, 'summer'),
            (2020, 'winter'),
            (2021, 'summer'),
            (2021, 'winter')],
           names=['year', 'season'])




Option # 2: Using .from_arrays()

# Import library
import pandas as pd
import numpy as np

# Create MultiIndex
years = [2020, 2020, 2021, 2021]
season = ['summer','winter','summer','winter']
index = pd.MultiIndex.from_arrays([years, season],
                                  names=['year', 'season'])

# Create DataFrame
df = pd.DataFrame(np.random.randint(low=20,high=40,size=(3,4)),
    columns = index
)

# Output
print(df)
print(df.columns)

Output

year     2020          2021       
season summer winter summer winter
0          32     33     25     29
1          32     34     24     36
2          23     36     25     34
MultiIndex([(2020, 'summer'),
            (2020, 'winter'),
            (2021, 'summer'),
            (2021, 'winter')],
           names=['year', 'season'])






Any errors in code above?
Please send a message.