travailler les co-occurences avec pandas
les ingrédients
pokemons = pd.read_csv('https://gist.githubusercontent.com/armgilles/194bcff35001e7eb53a2a8b441e8b2c6/raw/92200bc0a673d5ce2110aaad4544ed6c4010f687/pokemon.csv')
pokemons.set_index('Name')| # | Type 1 | Type 2 | Total | HP | Attack | Defense | Sp. Atk | Sp. Def | Speed | Generation | Legendary | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Name | ||||||||||||
| Bulbasaur | 1 | Grass | Poison | 318 | 45 | 49 | 49 | 65 | 65 | 45 | 1 | False |
| Ivysaur | 2 | Grass | Poison | 405 | 60 | 62 | 63 | 80 | 80 | 60 | 1 | False |
| Venusaur | 3 | Grass | Poison | 525 | 80 | 82 | 83 | 100 | 100 | 80 | 1 | False |
| VenusaurMega Venusaur | 3 | Grass | Poison | 625 | 80 | 100 | 123 | 122 | 120 | 80 | 1 | False |
| Charmander | 4 | Fire | NaN | 309 | 39 | 52 | 43 | 60 | 50 | 65 | 1 | False |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| Diancie | 719 | Rock | Fairy | 600 | 50 | 100 | 150 | 100 | 150 | 50 | 6 | True |
| DiancieMega Diancie | 719 | Rock | Fairy | 700 | 50 | 160 | 110 | 160 | 110 | 110 | 6 | True |
| HoopaHoopa Confined | 720 | Psychic | Ghost | 600 | 80 | 110 | 60 | 150 | 130 | 70 | 6 | True |
| HoopaHoopa Unbound | 720 | Psychic | Dark | 680 | 80 | 160 | 60 | 170 | 130 | 80 | 6 | True |
| Volcanion | 721 | Fire | Water | 600 | 80 | 110 | 120 | 130 | 90 | 70 | 6 | True |
800 rows × 12 columns
transformer des colonnes de liste en une liste colonnes
mlb = MultiLabelBinarizer()
pokemons_types = (
pokemons
.set_index('Name')
.fillna('No Type 2')
.assign(
types = lambda df: list(zip(df['Type 1'], df['Type 2']))
)
.pipe(
lambda df: pd.DataFrame(
mlb.fit_transform(df.types),
columns=mlb.classes_,
index=df.index
)
)
)
pokemons_types| Bug | Dark | Dragon | Electric | Fairy | Fighting | Fire | Flying | Ghost | Grass | Ground | Ice | No Type 2 | Normal | Poison | Psychic | Rock | Steel | Water | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Name | |||||||||||||||||||
| Bulbasaur | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
| Ivysaur | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
| Venusaur | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
| VenusaurMega Venusaur | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
| Charmander | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| Diancie | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
| DiancieMega Diancie | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
| HoopaHoopa Confined | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| HoopaHoopa Unbound | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
| Volcanion | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 |
800 rows × 19 columns
visualiser la matrice de corrélation
(
pokemons_types
.corr()
.style
.format('{:+,.1%}'.format)
.background_gradient(
cmap='PiYG',
vmin=-1,
vmax=1
)
)| Bug | Dark | Dragon | Electric | Fairy | Fighting | Fire | Flying | Ghost | Grass | Ground | Ice | No Type 2 | Normal | Poison | Psychic | Rock | Steel | Water | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Bug | +100.0% | -8.2% | -8.1% | -4.5% | -7.2% | -4.9% | -6.1% | +6.5% | -5.9% | -3.4% | -6.4% | -7.0% | -15.5% | -12.0% | +12.1% | -11.2% | -0.4% | +4.7% | -12.4% |
| Dark | -8.2% | +100.0% | -0.4% | -6.7% | -6.0% | -0.8% | -2.0% | -2.2% | +0.1% | -4.8% | -2.3% | -1.0% | -15.0% | -10.0% | -1.8% | -4.4% | -3.3% | -2.4% | -2.9% |
| Dragon | -8.1% | -0.4% | +100.0% | -2.4% | -3.6% | -6.9% | -3.8% | +2.6% | -1.9% | -7.9% | +5.2% | +1.5% | -13.6% | -9.9% | -5.6% | -2.7% | -3.2% | -4.4% | -8.3% |
| Electric | -4.5% | -6.7% | -2.4% | +100.0% | -3.6% | -6.9% | -5.7% | -2.0% | -4.2% | -7.9% | -5.9% | -3.3% | +3.0% | -6.8% | -7.5% | -9.2% | -7.2% | -0.1% | -6.9% |
| Fairy | -7.2% | -6.0% | -3.6% | -3.6% | +100.0% | -6.1% | -6.8% | -5.3% | -5.7% | -4.9% | -6.9% | -5.1% | -4.9% | -0.2% | -6.6% | +2.7% | +0.2% | +1.3% | -6.8% |
| Fighting | -4.9% | -0.8% | -6.9% | -6.9% | -6.1% | +100.0% | +5.1% | -8.6% | -6.6% | -5.1% | -8.1% | -5.9% | -5.6% | -7.2% | -4.0% | +0.1% | -5.5% | -0.5% | -7.4% |
| Fire | -6.1% | -2.0% | -3.8% | -5.7% | -6.8% | +5.1% | +100.0% | -2.9% | -1.3% | -10.8% | -2.3% | -6.6% | -2.7% | -8.5% | -8.5% | -6.1% | -6.5% | -5.6% | -11.5% |
| Flying | +6.5% | -2.2% | +2.6% | -2.0% | -5.3% | -8.6% | -2.9% | +100.0% | -6.2% | -8.1% | -6.1% | -4.9% | -35.2% | +12.6% | -6.8% | -6.4% | -4.8% | -8.1% | -9.2% |
| Ghost | -5.9% | +0.1% | -1.9% | -4.2% | -5.7% | -6.6% | -1.3% | -6.2% | +100.0% | +7.5% | -3.6% | -3.0% | -13.1% | -9.4% | +0.9% | -7.1% | -6.9% | +2.6% | -7.7% |
| Grass | -3.4% | -4.8% | -7.9% | -7.9% | -4.9% | -5.1% | -10.8% | -8.1% | +7.5% | +100.0% | -9.7% | -2.7% | -9.9% | -11.7% | +11.0% | -9.4% | -7.3% | -6.2% | -12.7% |
| Ground | -6.4% | -2.3% | +5.2% | -5.9% | -6.9% | -8.1% | -2.3% | -6.1% | -3.6% | -9.7% | +100.0% | -0.4% | -17.5% | -10.2% | -5.4% | -7.9% | +7.2% | -2.1% | -0.7% |
| Ice | -7.0% | -1.0% | +1.5% | -3.3% | -5.1% | -5.9% | -6.6% | -4.9% | -3.0% | -2.7% | -0.4% | +100.0% | -6.3% | -8.5% | -6.5% | -4.2% | -1.7% | -5.7% | +0.0% |
| No Type 2 | -15.5% | -15.0% | -13.6% | +3.0% | -4.9% | -5.6% | -2.7% | -35.2% | -13.1% | -9.9% | -17.5% | -6.3% | +100.0% | +8.8% | -14.0% | -4.3% | -18.3% | -19.4% | -1.2% |
| Normal | -12.0% | -10.0% | -9.9% | -6.8% | -0.2% | -7.2% | -8.5% | +12.6% | -9.4% | -11.7% | -10.2% | -8.5% | +8.8% | +100.0% | -11.1% | -11.2% | -10.7% | -9.8% | -15.5% |
| Poison | +12.1% | -1.8% | -5.6% | -7.5% | -6.6% | -4.0% | -8.5% | -6.8% | +0.9% | +11.0% | -5.4% | -6.5% | -14.0% | -11.1% | +100.0% | -10.3% | -8.1% | -7.4% | -7.4% |
| Psychic | -11.2% | -4.4% | -2.7% | -9.2% | +2.7% | +0.1% | -6.1% | -6.4% | -7.1% | -9.4% | -7.9% | -4.2% | -4.3% | -11.2% | -10.3% | +100.0% | -6.9% | +2.5% | -10.0% |
| Rock | -0.4% | -3.3% | -3.2% | -7.2% | +0.2% | -5.5% | -6.5% | -4.8% | -6.9% | -7.3% | +7.2% | -1.7% | -18.3% | -10.7% | -8.1% | -6.9% | +100.0% | +4.9% | +1.1% |
| Steel | +4.7% | -2.4% | -4.4% | -0.1% | +1.3% | -0.5% | -5.6% | -8.1% | +2.6% | -6.2% | -2.1% | -5.7% | -19.4% | -9.8% | -7.4% | +2.5% | +4.9% | +100.0% | -9.6% |
| Water | -12.4% | -2.9% | -8.3% | -6.9% | -6.8% | -7.4% | -11.5% | -9.2% | -7.7% | -12.7% | -0.7% | +0.0% | -1.2% | -15.5% | -7.4% | -10.0% | +1.1% | -9.6% | +100.0% |
compter les co-occurences
| Bug | Dark | Dragon | Electric | Fairy | Fighting | Fire | Flying | Ghost | Grass | Ground | Ice | No Type 2 | Normal | Poison | Psychic | Rock | Steel | Water | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Bug | 72 | 0 | 0 | 2 | 0 | 2 | 2 | 14 | 1 | 6 | 2 | 0 | 17 | 0 | 13 | 0 | 5 | 7 | 1 |
| Dark | 0 | 51 | 3 | 0 | 0 | 3 | 3 | 5 | 3 | 3 | 3 | 2 | 10 | 0 | 3 | 3 | 2 | 2 | 6 |
| Dragon | 0 | 3 | 50 | 2 | 1 | 0 | 2 | 8 | 2 | 1 | 7 | 3 | 11 | 0 | 1 | 4 | 2 | 1 | 2 |
| Electric | 2 | 0 | 2 | 50 | 1 | 0 | 1 | 5 | 1 | 1 | 1 | 1 | 27 | 2 | 0 | 0 | 0 | 3 | 3 |
| Fairy | 0 | 0 | 1 | 1 | 40 | 0 | 0 | 2 | 0 | 2 | 0 | 0 | 15 | 5 | 0 | 6 | 3 | 3 | 2 |
| Fighting | 2 | 3 | 0 | 0 | 0 | 53 | 7 | 1 | 0 | 3 | 0 | 0 | 20 | 2 | 2 | 6 | 1 | 3 | 3 |
| Fire | 2 | 3 | 2 | 1 | 0 | 7 | 64 | 6 | 3 | 0 | 4 | 0 | 28 | 2 | 0 | 3 | 1 | 1 | 1 |
| Flying | 14 | 5 | 8 | 5 | 2 | 1 | 6 | 101 | 2 | 5 | 4 | 2 | 2 | 24 | 3 | 6 | 4 | 1 | 7 |
| Ghost | 1 | 3 | 2 | 1 | 0 | 0 | 3 | 2 | 46 | 10 | 2 | 1 | 10 | 0 | 4 | 1 | 0 | 4 | 2 |
| Grass | 6 | 3 | 1 | 1 | 2 | 3 | 0 | 5 | 10 | 95 | 1 | 3 | 33 | 2 | 15 | 3 | 2 | 2 | 3 |
| Ground | 2 | 3 | 7 | 1 | 0 | 0 | 4 | 4 | 2 | 1 | 67 | 3 | 13 | 1 | 2 | 2 | 9 | 3 | 10 |
| Ice | 0 | 2 | 3 | 1 | 0 | 0 | 0 | 2 | 1 | 3 | 3 | 38 | 13 | 0 | 0 | 2 | 2 | 0 | 6 |
| No Type 2 | 17 | 10 | 11 | 27 | 15 | 20 | 28 | 2 | 10 | 33 | 13 | 13 | 386 | 61 | 15 | 38 | 9 | 5 | 59 |
| Normal | 0 | 0 | 0 | 2 | 5 | 2 | 2 | 24 | 0 | 2 | 1 | 0 | 61 | 102 | 0 | 2 | 0 | 0 | 1 |
| Poison | 13 | 3 | 1 | 0 | 0 | 2 | 0 | 3 | 4 | 15 | 2 | 0 | 15 | 0 | 62 | 0 | 0 | 0 | 4 |
| Psychic | 0 | 3 | 4 | 0 | 6 | 6 | 3 | 6 | 1 | 3 | 2 | 2 | 38 | 2 | 0 | 90 | 2 | 7 | 5 |
| Rock | 5 | 2 | 2 | 0 | 3 | 1 | 1 | 4 | 0 | 2 | 9 | 2 | 9 | 0 | 0 | 2 | 58 | 6 | 10 |
| Steel | 7 | 2 | 1 | 3 | 3 | 3 | 1 | 1 | 4 | 2 | 3 | 0 | 5 | 0 | 0 | 7 | 6 | 49 | 1 |
| Water | 1 | 6 | 2 | 3 | 2 | 3 | 1 | 7 | 2 | 3 | 10 | 6 | 59 | 1 | 4 | 5 | 10 | 1 | 126 |
visualiser les ressemblances
tsne = manifold.TSNE(learning_rate='auto', init='random')
pokemons_xy = (
pd
.DataFrame(
tsne.fit_transform(pokemons_types),
columns=['x', 'y'],
index=pokemons_types.index
)
)
pokemons_xy| x | y | |
|---|---|---|
| Name | ||
| Bulbasaur | -3.390579 | -21.680622 |
| Ivysaur | -3.987710 | -21.019449 |
| Venusaur | -3.987710 | -21.019449 |
| VenusaurMega Venusaur | -3.390579 | -21.680622 |
| Charmander | -1.660779 | 13.174160 |
| ... | ... | ... |
| Diancie | 20.239725 | 17.253794 |
| DiancieMega Diancie | 20.239725 | 17.253794 |
| HoopaHoopa Confined | -25.158485 | -52.960030 |
| HoopaHoopa Unbound | 26.827829 | -9.227021 |
| Volcanion | 41.527817 | -61.349205 |
800 rows × 2 columns
