10  pandas + nympy + dfply

10.1 pandas

code/py-pandas.py
# https://youtu.be/vmEHCJofslg

import pandas as pd

df = pd.read_csv('data/pokemon_data.csv')
# df = pd.read_excel('data/pokemon_data.xlsx')
# df = pd.read_csv('data/pokemon_data.txt', delimiter='\t')

# head and tail
print(df.tail(5))

print(df.head(5))

df.head(5)
df.tail(5)

# read headers
df.columns

# coluna específica
df.Name
df['Name']
df[['Name']]

# colunas específicas e linhas específicas
df['Name'][0:5]
df[['Name','Type 1','HP']][0:5]

# linha específica
df.iloc[0:3]

# ler de um local específico
# seleciona linhas e colunas por índice
df.iloc[2,1]
df.iloc[0:6, 1:5]

# loc torna um pandas.DataFrame igual a um R::dataframe
# seleciona linhas e coluns por nome
df.loc[ df.HP > 50, ['Name', 'Type 1', 'HP', 'Speed']]

df.loc[0:5, ['Name', 'Type 1', 'HP', 'Speed']]

df.loc[df['Type 1']=='Fire'][0:5]

# estatísticas básicas  
df.describe()

# sorting
df.sort_values('Name', ascending=False)
df.sort_values(['Type 1', 'HP'], ascending=True)
df.sort_values(['Type 1', 'HP'], ascending=[1,0])

# making changes to the data
df['Total'] = df['HP'] + df['Attack'] + df['Defense']
df.Total = df.HP + df.Attack + df.Defense

# deletar uma coluna
df = df.drop(columns=['Total'])

# somar as colunas 4 até a 9 para todas as linhas
df['Total'] = df.iloc[:,4:9].sum(axis=1) 

# mudar a ordem das colunas
cols = df.columns.values
[cols[0:4], cols[4:13]]
df = df[cols[0:4], cols[4:11]]

# salvar um data frame
df.to_csv('modified.csv')
df.to_csv('modified.csv', index=False)
df.to_csv('modified.csv', index=False, sep='\t')

df.to_excel('modified.xlsx', index=False)

# filtering data
df.loc[(df['Type 1'] == 'Grass') & (df['Type 2'] == 'Poison')]
df.loc[(df['Type 1'] == 'Grass') | (df['Type 2'] == 'Poison')]
df.loc[(df['Type 1'] == 'Grass') & (df['Type 2'] == 'Poison') & (df['HP'] >70)]

10.2 numpy

code/py-numpy.py
import numpy as np

a = np.array([1,2,3], dtype='int32')
print(a)

b = np.array([[9.0,8.0,7.0],[6.0,5.0,4.0]])
print(b)

# Get Dimension
a.ndim

# Get Shape
b.shape

# Get Type
a.dtype

# Get Size
a.itemsize

# Get total size
a.nbytes

# Get number of elements
a.size

# --------------------
# Accessing/Changing specific elements, rows, columns, etc

a = np.array([[1,2,3,4,5,6,7],[8,9,10,11,12,13,14]])
print(a)

# Get a specific element [r, c]
a[1, 5]

# Get a specific row 
a[0, :]
a[0, ]

# Get a specific column
a[:, 2]

# Getting a little more fancy [startindex:endindex:stepsize]
a[0, 1:-1:2]

a[1,5] = 20

a[:,2] = [1,2]
print(a)

# *3-d example
b = np.array([[[1,2],[3,4]],[[5,6],[7,8]]])
print(b)

# Get specific element (work outside in)
b[0,1,1]

# replace 
b[:,1,:] = [[9,9,9],[8,8]]
b

# Initializing Different Types of Arrays
# All 0s matrix
np.zeros((2,3))

# All 1s matrix
np.ones((4,2,2), dtype='int32')

# Any other number
np.full((2,2), 99)

# Any other number (full_like)
np.full_like(a, 4)

# Random decimal numbers
np.random.rand(4,2)

# Random Integer values
np.random.randint(-4,8, size=(3,3))

# The identity matrix
np.identity(5)

# Repeat an array
arr = np.array([[1,2,3]])
r1 = np.repeat(arr,3, axis=0)
print(r1)

output = np.ones((5,5))
print(output)

z = np.zeros((3,3))
z[1,1] = 9
print(z)

output[1:-1,1:-1] = z
print(output)


# Be careful when copying arrays!!!
a = np.array([1,2,3])
b = a.copy()
b[0] = 100

print(a)

# --------------------
# Mathematics
a = np.array([1,2,3,4])
print(a)

a + 2
a - 2
a * 2
a / 2
b = np.array([1,0,1,0])
a + b
a ** 2

# Take the sin
np.cos(a)

# Linear Algebra
a = np.ones((2,3))
print(a)

b = np.full((3,2), 2)
print(b)

np.matmul(a,b)

# Find the determinant
c = np.identity(3)
np.linalg.det(c)

# --------------------
# Statistics

stats = np.array([[1,2,3],[4,5,6]])
stats

np.min(stats)

np.max(stats, axis=1)

np.sum(stats, axis=0)

# Reorganizing Arrays
before = np.array([[1,2,3,4],[5,6,7,8]])
print(before)

after = before.reshape((2,3))
print(after)

# Vertically stacking vectors
v1 = np.array([1,2,3,4])
v2 = np.array([5,6,7,8])

np.vstack([v1,v2,v1,v2])

# Horizontal  stack
h1 = np.ones((2,4))
h2 = np.zeros((2,2))

np.hstack((h1,h2))

# --------------------
# Miscellaneous

# Load Data from File
filedata = np.genfromtxt('data.txt', delimiter=',')
filedata = filedata.astype('int32')
print(filedata)

10.3 dfply

code/py-numpy.py
import numpy as np

a = np.array([1,2,3], dtype='int32')
print(a)

b = np.array([[9.0,8.0,7.0],[6.0,5.0,4.0]])
print(b)

# Get Dimension
a.ndim

# Get Shape
b.shape

# Get Type
a.dtype

# Get Size
a.itemsize

# Get total size
a.nbytes

# Get number of elements
a.size

# --------------------
# Accessing/Changing specific elements, rows, columns, etc

a = np.array([[1,2,3,4,5,6,7],[8,9,10,11,12,13,14]])
print(a)

# Get a specific element [r, c]
a[1, 5]

# Get a specific row 
a[0, :]
a[0, ]

# Get a specific column
a[:, 2]

# Getting a little more fancy [startindex:endindex:stepsize]
a[0, 1:-1:2]

a[1,5] = 20

a[:,2] = [1,2]
print(a)

# *3-d example
b = np.array([[[1,2],[3,4]],[[5,6],[7,8]]])
print(b)

# Get specific element (work outside in)
b[0,1,1]

# replace 
b[:,1,:] = [[9,9,9],[8,8]]
b

# Initializing Different Types of Arrays
# All 0s matrix
np.zeros((2,3))

# All 1s matrix
np.ones((4,2,2), dtype='int32')

# Any other number
np.full((2,2), 99)

# Any other number (full_like)
np.full_like(a, 4)

# Random decimal numbers
np.random.rand(4,2)

# Random Integer values
np.random.randint(-4,8, size=(3,3))

# The identity matrix
np.identity(5)

# Repeat an array
arr = np.array([[1,2,3]])
r1 = np.repeat(arr,3, axis=0)
print(r1)

output = np.ones((5,5))
print(output)

z = np.zeros((3,3))
z[1,1] = 9
print(z)

output[1:-1,1:-1] = z
print(output)


# Be careful when copying arrays!!!
a = np.array([1,2,3])
b = a.copy()
b[0] = 100

print(a)

# --------------------
# Mathematics
a = np.array([1,2,3,4])
print(a)

a + 2
a - 2
a * 2
a / 2
b = np.array([1,0,1,0])
a + b
a ** 2

# Take the sin
np.cos(a)

# Linear Algebra
a = np.ones((2,3))
print(a)

b = np.full((3,2), 2)
print(b)

np.matmul(a,b)

# Find the determinant
c = np.identity(3)
np.linalg.det(c)

# --------------------
# Statistics

stats = np.array([[1,2,3],[4,5,6]])
stats

np.min(stats)

np.max(stats, axis=1)

np.sum(stats, axis=0)

# Reorganizing Arrays
before = np.array([[1,2,3,4],[5,6,7,8]])
print(before)

after = before.reshape((2,3))
print(after)

# Vertically stacking vectors
v1 = np.array([1,2,3,4])
v2 = np.array([5,6,7,8])

np.vstack([v1,v2,v1,v2])

# Horizontal  stack
h1 = np.ones((2,4))
h2 = np.zeros((2,2))

np.hstack((h1,h2))

# --------------------
# Miscellaneous

# Load Data from File
filedata = np.genfromtxt('data.txt', delimiter=',')
filedata = filedata.astype('int32')
print(filedata)