travailler les co-occurences avec pandas
les ingrédients
pokemons = pd.read_csv('https://gist.githubusercontent.com/armgilles/194bcff35001e7eb53a2a8b441e8b2c6/raw/92200bc0a673d5ce2110aaad4544ed6c4010f687/pokemon.csv')
pokemons.set_index('Name')
# | Type 1 | Type 2 | Total | HP | Attack | Defense | Sp. Atk | Sp. Def | Speed | Generation | Legendary | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
Name | ||||||||||||
Bulbasaur | 1 | Grass | Poison | 318 | 45 | 49 | 49 | 65 | 65 | 45 | 1 | False |
Ivysaur | 2 | Grass | Poison | 405 | 60 | 62 | 63 | 80 | 80 | 60 | 1 | False |
Venusaur | 3 | Grass | Poison | 525 | 80 | 82 | 83 | 100 | 100 | 80 | 1 | False |
VenusaurMega Venusaur | 3 | Grass | Poison | 625 | 80 | 100 | 123 | 122 | 120 | 80 | 1 | False |
Charmander | 4 | Fire | NaN | 309 | 39 | 52 | 43 | 60 | 50 | 65 | 1 | False |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
Diancie | 719 | Rock | Fairy | 600 | 50 | 100 | 150 | 100 | 150 | 50 | 6 | True |
DiancieMega Diancie | 719 | Rock | Fairy | 700 | 50 | 160 | 110 | 160 | 110 | 110 | 6 | True |
HoopaHoopa Confined | 720 | Psychic | Ghost | 600 | 80 | 110 | 60 | 150 | 130 | 70 | 6 | True |
HoopaHoopa Unbound | 720 | Psychic | Dark | 680 | 80 | 160 | 60 | 170 | 130 | 80 | 6 | True |
Volcanion | 721 | Fire | Water | 600 | 80 | 110 | 120 | 130 | 90 | 70 | 6 | True |
800 rows × 12 columns
transformer des colonnes de liste en une liste colonnes
mlb = MultiLabelBinarizer()
pokemons_types = (
pokemons
.set_index('Name')
.fillna('No Type 2')
.assign(
types = lambda df: list(zip(df['Type 1'], df['Type 2']))
)
.pipe(
lambda df: pd.DataFrame(
mlb.fit_transform(df.types),
columns=mlb.classes_,
index=df.index
)
)
)
pokemons_types
Bug | Dark | Dragon | Electric | Fairy | Fighting | Fire | Flying | Ghost | Grass | Ground | Ice | No Type 2 | Normal | Poison | Psychic | Rock | Steel | Water | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Name | |||||||||||||||||||
Bulbasaur | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
Ivysaur | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
Venusaur | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
VenusaurMega Venusaur | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
Charmander | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
Diancie | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
DiancieMega Diancie | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
HoopaHoopa Confined | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
HoopaHoopa Unbound | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
Volcanion | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
800 rows × 19 columns
visualiser la matrice de corrélation
(
pokemons_types
.corr()
.style
.format('{:+,.1%}'.format)
.background_gradient(
cmap='PiYG',
vmin=-1,
vmax=1
)
)
Bug | Dark | Dragon | Electric | Fairy | Fighting | Fire | Flying | Ghost | Grass | Ground | Ice | No Type 2 | Normal | Poison | Psychic | Rock | Steel | Water | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Bug | +100.0% | -8.2% | -8.1% | -4.5% | -7.2% | -4.9% | -6.1% | +6.5% | -5.9% | -3.4% | -6.4% | -7.0% | -15.5% | -12.0% | +12.1% | -11.2% | -0.4% | +4.7% | -12.4% |
Dark | -8.2% | +100.0% | -0.4% | -6.7% | -6.0% | -0.8% | -2.0% | -2.2% | +0.1% | -4.8% | -2.3% | -1.0% | -15.0% | -10.0% | -1.8% | -4.4% | -3.3% | -2.4% | -2.9% |
Dragon | -8.1% | -0.4% | +100.0% | -2.4% | -3.6% | -6.9% | -3.8% | +2.6% | -1.9% | -7.9% | +5.2% | +1.5% | -13.6% | -9.9% | -5.6% | -2.7% | -3.2% | -4.4% | -8.3% |
Electric | -4.5% | -6.7% | -2.4% | +100.0% | -3.6% | -6.9% | -5.7% | -2.0% | -4.2% | -7.9% | -5.9% | -3.3% | +3.0% | -6.8% | -7.5% | -9.2% | -7.2% | -0.1% | -6.9% |
Fairy | -7.2% | -6.0% | -3.6% | -3.6% | +100.0% | -6.1% | -6.8% | -5.3% | -5.7% | -4.9% | -6.9% | -5.1% | -4.9% | -0.2% | -6.6% | +2.7% | +0.2% | +1.3% | -6.8% |
Fighting | -4.9% | -0.8% | -6.9% | -6.9% | -6.1% | +100.0% | +5.1% | -8.6% | -6.6% | -5.1% | -8.1% | -5.9% | -5.6% | -7.2% | -4.0% | +0.1% | -5.5% | -0.5% | -7.4% |
Fire | -6.1% | -2.0% | -3.8% | -5.7% | -6.8% | +5.1% | +100.0% | -2.9% | -1.3% | -10.8% | -2.3% | -6.6% | -2.7% | -8.5% | -8.5% | -6.1% | -6.5% | -5.6% | -11.5% |
Flying | +6.5% | -2.2% | +2.6% | -2.0% | -5.3% | -8.6% | -2.9% | +100.0% | -6.2% | -8.1% | -6.1% | -4.9% | -35.2% | +12.6% | -6.8% | -6.4% | -4.8% | -8.1% | -9.2% |
Ghost | -5.9% | +0.1% | -1.9% | -4.2% | -5.7% | -6.6% | -1.3% | -6.2% | +100.0% | +7.5% | -3.6% | -3.0% | -13.1% | -9.4% | +0.9% | -7.1% | -6.9% | +2.6% | -7.7% |
Grass | -3.4% | -4.8% | -7.9% | -7.9% | -4.9% | -5.1% | -10.8% | -8.1% | +7.5% | +100.0% | -9.7% | -2.7% | -9.9% | -11.7% | +11.0% | -9.4% | -7.3% | -6.2% | -12.7% |
Ground | -6.4% | -2.3% | +5.2% | -5.9% | -6.9% | -8.1% | -2.3% | -6.1% | -3.6% | -9.7% | +100.0% | -0.4% | -17.5% | -10.2% | -5.4% | -7.9% | +7.2% | -2.1% | -0.7% |
Ice | -7.0% | -1.0% | +1.5% | -3.3% | -5.1% | -5.9% | -6.6% | -4.9% | -3.0% | -2.7% | -0.4% | +100.0% | -6.3% | -8.5% | -6.5% | -4.2% | -1.7% | -5.7% | +0.0% |
No Type 2 | -15.5% | -15.0% | -13.6% | +3.0% | -4.9% | -5.6% | -2.7% | -35.2% | -13.1% | -9.9% | -17.5% | -6.3% | +100.0% | +8.8% | -14.0% | -4.3% | -18.3% | -19.4% | -1.2% |
Normal | -12.0% | -10.0% | -9.9% | -6.8% | -0.2% | -7.2% | -8.5% | +12.6% | -9.4% | -11.7% | -10.2% | -8.5% | +8.8% | +100.0% | -11.1% | -11.2% | -10.7% | -9.8% | -15.5% |
Poison | +12.1% | -1.8% | -5.6% | -7.5% | -6.6% | -4.0% | -8.5% | -6.8% | +0.9% | +11.0% | -5.4% | -6.5% | -14.0% | -11.1% | +100.0% | -10.3% | -8.1% | -7.4% | -7.4% |
Psychic | -11.2% | -4.4% | -2.7% | -9.2% | +2.7% | +0.1% | -6.1% | -6.4% | -7.1% | -9.4% | -7.9% | -4.2% | -4.3% | -11.2% | -10.3% | +100.0% | -6.9% | +2.5% | -10.0% |
Rock | -0.4% | -3.3% | -3.2% | -7.2% | +0.2% | -5.5% | -6.5% | -4.8% | -6.9% | -7.3% | +7.2% | -1.7% | -18.3% | -10.7% | -8.1% | -6.9% | +100.0% | +4.9% | +1.1% |
Steel | +4.7% | -2.4% | -4.4% | -0.1% | +1.3% | -0.5% | -5.6% | -8.1% | +2.6% | -6.2% | -2.1% | -5.7% | -19.4% | -9.8% | -7.4% | +2.5% | +4.9% | +100.0% | -9.6% |
Water | -12.4% | -2.9% | -8.3% | -6.9% | -6.8% | -7.4% | -11.5% | -9.2% | -7.7% | -12.7% | -0.7% | +0.0% | -1.2% | -15.5% | -7.4% | -10.0% | +1.1% | -9.6% | +100.0% |
compter les co-occurences
Bug | Dark | Dragon | Electric | Fairy | Fighting | Fire | Flying | Ghost | Grass | Ground | Ice | No Type 2 | Normal | Poison | Psychic | Rock | Steel | Water | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
Bug | 72 | 0 | 0 | 2 | 0 | 2 | 2 | 14 | 1 | 6 | 2 | 0 | 17 | 0 | 13 | 0 | 5 | 7 | 1 |
Dark | 0 | 51 | 3 | 0 | 0 | 3 | 3 | 5 | 3 | 3 | 3 | 2 | 10 | 0 | 3 | 3 | 2 | 2 | 6 |
Dragon | 0 | 3 | 50 | 2 | 1 | 0 | 2 | 8 | 2 | 1 | 7 | 3 | 11 | 0 | 1 | 4 | 2 | 1 | 2 |
Electric | 2 | 0 | 2 | 50 | 1 | 0 | 1 | 5 | 1 | 1 | 1 | 1 | 27 | 2 | 0 | 0 | 0 | 3 | 3 |
Fairy | 0 | 0 | 1 | 1 | 40 | 0 | 0 | 2 | 0 | 2 | 0 | 0 | 15 | 5 | 0 | 6 | 3 | 3 | 2 |
Fighting | 2 | 3 | 0 | 0 | 0 | 53 | 7 | 1 | 0 | 3 | 0 | 0 | 20 | 2 | 2 | 6 | 1 | 3 | 3 |
Fire | 2 | 3 | 2 | 1 | 0 | 7 | 64 | 6 | 3 | 0 | 4 | 0 | 28 | 2 | 0 | 3 | 1 | 1 | 1 |
Flying | 14 | 5 | 8 | 5 | 2 | 1 | 6 | 101 | 2 | 5 | 4 | 2 | 2 | 24 | 3 | 6 | 4 | 1 | 7 |
Ghost | 1 | 3 | 2 | 1 | 0 | 0 | 3 | 2 | 46 | 10 | 2 | 1 | 10 | 0 | 4 | 1 | 0 | 4 | 2 |
Grass | 6 | 3 | 1 | 1 | 2 | 3 | 0 | 5 | 10 | 95 | 1 | 3 | 33 | 2 | 15 | 3 | 2 | 2 | 3 |
Ground | 2 | 3 | 7 | 1 | 0 | 0 | 4 | 4 | 2 | 1 | 67 | 3 | 13 | 1 | 2 | 2 | 9 | 3 | 10 |
Ice | 0 | 2 | 3 | 1 | 0 | 0 | 0 | 2 | 1 | 3 | 3 | 38 | 13 | 0 | 0 | 2 | 2 | 0 | 6 |
No Type 2 | 17 | 10 | 11 | 27 | 15 | 20 | 28 | 2 | 10 | 33 | 13 | 13 | 386 | 61 | 15 | 38 | 9 | 5 | 59 |
Normal | 0 | 0 | 0 | 2 | 5 | 2 | 2 | 24 | 0 | 2 | 1 | 0 | 61 | 102 | 0 | 2 | 0 | 0 | 1 |
Poison | 13 | 3 | 1 | 0 | 0 | 2 | 0 | 3 | 4 | 15 | 2 | 0 | 15 | 0 | 62 | 0 | 0 | 0 | 4 |
Psychic | 0 | 3 | 4 | 0 | 6 | 6 | 3 | 6 | 1 | 3 | 2 | 2 | 38 | 2 | 0 | 90 | 2 | 7 | 5 |
Rock | 5 | 2 | 2 | 0 | 3 | 1 | 1 | 4 | 0 | 2 | 9 | 2 | 9 | 0 | 0 | 2 | 58 | 6 | 10 |
Steel | 7 | 2 | 1 | 3 | 3 | 3 | 1 | 1 | 4 | 2 | 3 | 0 | 5 | 0 | 0 | 7 | 6 | 49 | 1 |
Water | 1 | 6 | 2 | 3 | 2 | 3 | 1 | 7 | 2 | 3 | 10 | 6 | 59 | 1 | 4 | 5 | 10 | 1 | 126 |
visualiser les ressemblances
tsne = manifold.TSNE(learning_rate='auto', init='random')
pokemons_xy = (
pd
.DataFrame(
tsne.fit_transform(pokemons_types),
columns=['x', 'y'],
index=pokemons_types.index
)
)
pokemons_xy
x | y | |
---|---|---|
Name | ||
Bulbasaur | -3.390579 | -21.680622 |
Ivysaur | -3.987710 | -21.019449 |
Venusaur | -3.987710 | -21.019449 |
VenusaurMega Venusaur | -3.390579 | -21.680622 |
Charmander | -1.660779 | 13.174160 |
... | ... | ... |
Diancie | 20.239725 | 17.253794 |
DiancieMega Diancie | 20.239725 | 17.253794 |
HoopaHoopa Confined | -25.158485 | -52.960030 |
HoopaHoopa Unbound | 26.827829 | -9.227021 |
Volcanion | 41.527817 | -61.349205 |
800 rows × 2 columns