travailler les co-occurences avec pandas

les ingrédients

import pandas as pd
from sklearn import manifold
from sklearn.preprocessing import MultiLabelBinarizer
import numpy as np
pokemons = pd.read_csv('https://gist.githubusercontent.com/armgilles/194bcff35001e7eb53a2a8b441e8b2c6/raw/92200bc0a673d5ce2110aaad4544ed6c4010f687/pokemon.csv')

pokemons.set_index('Name')
# Type 1 Type 2 Total HP Attack Defense Sp. Atk Sp. Def Speed Generation Legendary
Name
Bulbasaur 1 Grass Poison 318 45 49 49 65 65 45 1 False
Ivysaur 2 Grass Poison 405 60 62 63 80 80 60 1 False
Venusaur 3 Grass Poison 525 80 82 83 100 100 80 1 False
VenusaurMega Venusaur 3 Grass Poison 625 80 100 123 122 120 80 1 False
Charmander 4 Fire NaN 309 39 52 43 60 50 65 1 False
... ... ... ... ... ... ... ... ... ... ... ... ...
Diancie 719 Rock Fairy 600 50 100 150 100 150 50 6 True
DiancieMega Diancie 719 Rock Fairy 700 50 160 110 160 110 110 6 True
HoopaHoopa Confined 720 Psychic Ghost 600 80 110 60 150 130 70 6 True
HoopaHoopa Unbound 720 Psychic Dark 680 80 160 60 170 130 80 6 True
Volcanion 721 Fire Water 600 80 110 120 130 90 70 6 True

800 rows × 12 columns

transformer des colonnes de liste en une liste colonnes

mlb = MultiLabelBinarizer()

pokemons_types = (
    pokemons
    .set_index('Name')
    .fillna('No Type 2')
    .assign(
        types = lambda df: list(zip(df['Type 1'], df['Type 2']))
    )
    .pipe(
        lambda df: pd.DataFrame(
            mlb.fit_transform(df.types),
            columns=mlb.classes_,
            index=df.index
        )
    )
 )
 
pokemons_types
Bug Dark Dragon Electric Fairy Fighting Fire Flying Ghost Grass Ground Ice No Type 2 Normal Poison Psychic Rock Steel Water
Name
Bulbasaur 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0
Ivysaur 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0
Venusaur 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0
VenusaurMega Venusaur 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0
Charmander 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 0 0 0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
Diancie 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0
DiancieMega Diancie 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0
HoopaHoopa Confined 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0
HoopaHoopa Unbound 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
Volcanion 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1

800 rows × 19 columns

visualiser la matrice de corrélation

(
    pokemons_types
    .corr()
    .style
        .format('{:+,.1%}'.format)
        .background_gradient(
            cmap='PiYG',
            vmin=-1,
            vmax=1
        )
)
  Bug Dark Dragon Electric Fairy Fighting Fire Flying Ghost Grass Ground Ice No Type 2 Normal Poison Psychic Rock Steel Water
Bug +100.0% -8.2% -8.1% -4.5% -7.2% -4.9% -6.1% +6.5% -5.9% -3.4% -6.4% -7.0% -15.5% -12.0% +12.1% -11.2% -0.4% +4.7% -12.4%
Dark -8.2% +100.0% -0.4% -6.7% -6.0% -0.8% -2.0% -2.2% +0.1% -4.8% -2.3% -1.0% -15.0% -10.0% -1.8% -4.4% -3.3% -2.4% -2.9%
Dragon -8.1% -0.4% +100.0% -2.4% -3.6% -6.9% -3.8% +2.6% -1.9% -7.9% +5.2% +1.5% -13.6% -9.9% -5.6% -2.7% -3.2% -4.4% -8.3%
Electric -4.5% -6.7% -2.4% +100.0% -3.6% -6.9% -5.7% -2.0% -4.2% -7.9% -5.9% -3.3% +3.0% -6.8% -7.5% -9.2% -7.2% -0.1% -6.9%
Fairy -7.2% -6.0% -3.6% -3.6% +100.0% -6.1% -6.8% -5.3% -5.7% -4.9% -6.9% -5.1% -4.9% -0.2% -6.6% +2.7% +0.2% +1.3% -6.8%
Fighting -4.9% -0.8% -6.9% -6.9% -6.1% +100.0% +5.1% -8.6% -6.6% -5.1% -8.1% -5.9% -5.6% -7.2% -4.0% +0.1% -5.5% -0.5% -7.4%
Fire -6.1% -2.0% -3.8% -5.7% -6.8% +5.1% +100.0% -2.9% -1.3% -10.8% -2.3% -6.6% -2.7% -8.5% -8.5% -6.1% -6.5% -5.6% -11.5%
Flying +6.5% -2.2% +2.6% -2.0% -5.3% -8.6% -2.9% +100.0% -6.2% -8.1% -6.1% -4.9% -35.2% +12.6% -6.8% -6.4% -4.8% -8.1% -9.2%
Ghost -5.9% +0.1% -1.9% -4.2% -5.7% -6.6% -1.3% -6.2% +100.0% +7.5% -3.6% -3.0% -13.1% -9.4% +0.9% -7.1% -6.9% +2.6% -7.7%
Grass -3.4% -4.8% -7.9% -7.9% -4.9% -5.1% -10.8% -8.1% +7.5% +100.0% -9.7% -2.7% -9.9% -11.7% +11.0% -9.4% -7.3% -6.2% -12.7%
Ground -6.4% -2.3% +5.2% -5.9% -6.9% -8.1% -2.3% -6.1% -3.6% -9.7% +100.0% -0.4% -17.5% -10.2% -5.4% -7.9% +7.2% -2.1% -0.7%
Ice -7.0% -1.0% +1.5% -3.3% -5.1% -5.9% -6.6% -4.9% -3.0% -2.7% -0.4% +100.0% -6.3% -8.5% -6.5% -4.2% -1.7% -5.7% +0.0%
No Type 2 -15.5% -15.0% -13.6% +3.0% -4.9% -5.6% -2.7% -35.2% -13.1% -9.9% -17.5% -6.3% +100.0% +8.8% -14.0% -4.3% -18.3% -19.4% -1.2%
Normal -12.0% -10.0% -9.9% -6.8% -0.2% -7.2% -8.5% +12.6% -9.4% -11.7% -10.2% -8.5% +8.8% +100.0% -11.1% -11.2% -10.7% -9.8% -15.5%
Poison +12.1% -1.8% -5.6% -7.5% -6.6% -4.0% -8.5% -6.8% +0.9% +11.0% -5.4% -6.5% -14.0% -11.1% +100.0% -10.3% -8.1% -7.4% -7.4%
Psychic -11.2% -4.4% -2.7% -9.2% +2.7% +0.1% -6.1% -6.4% -7.1% -9.4% -7.9% -4.2% -4.3% -11.2% -10.3% +100.0% -6.9% +2.5% -10.0%
Rock -0.4% -3.3% -3.2% -7.2% +0.2% -5.5% -6.5% -4.8% -6.9% -7.3% +7.2% -1.7% -18.3% -10.7% -8.1% -6.9% +100.0% +4.9% +1.1%
Steel +4.7% -2.4% -4.4% -0.1% +1.3% -0.5% -5.6% -8.1% +2.6% -6.2% -2.1% -5.7% -19.4% -9.8% -7.4% +2.5% +4.9% +100.0% -9.6%
Water -12.4% -2.9% -8.3% -6.9% -6.8% -7.4% -11.5% -9.2% -7.7% -12.7% -0.7% +0.0% -1.2% -15.5% -7.4% -10.0% +1.1% -9.6% +100.0%

compter les co-occurences

(
    pokemons_types.T
    .dot(pokemons_types)
)
Bug Dark Dragon Electric Fairy Fighting Fire Flying Ghost Grass Ground Ice No Type 2 Normal Poison Psychic Rock Steel Water
Bug 72 0 0 2 0 2 2 14 1 6 2 0 17 0 13 0 5 7 1
Dark 0 51 3 0 0 3 3 5 3 3 3 2 10 0 3 3 2 2 6
Dragon 0 3 50 2 1 0 2 8 2 1 7 3 11 0 1 4 2 1 2
Electric 2 0 2 50 1 0 1 5 1 1 1 1 27 2 0 0 0 3 3
Fairy 0 0 1 1 40 0 0 2 0 2 0 0 15 5 0 6 3 3 2
Fighting 2 3 0 0 0 53 7 1 0 3 0 0 20 2 2 6 1 3 3
Fire 2 3 2 1 0 7 64 6 3 0 4 0 28 2 0 3 1 1 1
Flying 14 5 8 5 2 1 6 101 2 5 4 2 2 24 3 6 4 1 7
Ghost 1 3 2 1 0 0 3 2 46 10 2 1 10 0 4 1 0 4 2
Grass 6 3 1 1 2 3 0 5 10 95 1 3 33 2 15 3 2 2 3
Ground 2 3 7 1 0 0 4 4 2 1 67 3 13 1 2 2 9 3 10
Ice 0 2 3 1 0 0 0 2 1 3 3 38 13 0 0 2 2 0 6
No Type 2 17 10 11 27 15 20 28 2 10 33 13 13 386 61 15 38 9 5 59
Normal 0 0 0 2 5 2 2 24 0 2 1 0 61 102 0 2 0 0 1
Poison 13 3 1 0 0 2 0 3 4 15 2 0 15 0 62 0 0 0 4
Psychic 0 3 4 0 6 6 3 6 1 3 2 2 38 2 0 90 2 7 5
Rock 5 2 2 0 3 1 1 4 0 2 9 2 9 0 0 2 58 6 10
Steel 7 2 1 3 3 3 1 1 4 2 3 0 5 0 0 7 6 49 1
Water 1 6 2 3 2 3 1 7 2 3 10 6 59 1 4 5 10 1 126

visualiser les ressemblances

tsne = manifold.TSNE(learning_rate='auto', init='random')

pokemons_xy = (
    pd
    .DataFrame(
        tsne.fit_transform(pokemons_types),
        columns=['x', 'y'],
        index=pokemons_types.index
    )
)

pokemons_xy
x y
Name
Bulbasaur -3.390579 -21.680622
Ivysaur -3.987710 -21.019449
Venusaur -3.987710 -21.019449
VenusaurMega Venusaur -3.390579 -21.680622
Charmander -1.660779 13.174160
... ... ...
Diancie 20.239725 17.253794
DiancieMega Diancie 20.239725 17.253794
HoopaHoopa Confined -25.158485 -52.960030
HoopaHoopa Unbound 26.827829 -9.227021
Volcanion 41.527817 -61.349205

800 rows × 2 columns

(
    pokemons_xy
    .join(
        pokemons
        .set_index('Name')
        [['Type 1']]
    )
    .plot
    .scatter(
        x='x',
        y='y',
        c= pokemons['Type 1'].astype('category').cat.codes,
        cmap='tab20',
        s=5,
        figsize=(15,15),
        legend=None
    )
)
<Axes: xlabel='x', ylabel='y'>