import numpy as np

lista = [1,2,3,4,5] # lista normal

# Array de lista
array1 = np.array(lista)

# Array de range
array2 = np.array(range(10))

# Array de arange
array3 = np.arange(10)

l1 = list(range(100000))
l2 = np.arange(100000)

%time for i in range(len(l1)): l1[i] = l1[i]*2
    
%time l2 = l2 * 2

#%time for i in range(100)

CPU times: total: 0 ns
Wall time: 9.86 ms
CPU times: total: 0 ns
Wall time: 0 ns

# Valores entre 0 e 9
arr1 = np.arange(0,10)
arr1

# Valores entre 5 e 14
arr2 = np.arange(5,15)
arr2

# Valores entre 5 e 14 com passo de 0.5
arr3 = np.arange(5,15, 0.5)
arr3

# Valores entre -3 e 9 com passo de 0.5
arr4 = np.arange(-3, 10)
arr4

array([-3, -2, -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9])

# Array com 10 elementos nulos
arr0 = np.zeros(10)
arr0

# Array com 10 elementos iguais a 1
arr0 = np.ones(10)
arr0

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

# Amostra de 10 números aleatórios gerados pela distribuição Normal Padrão
rand_arr1 = np.random.randn(10)
rand_arr1

# Amostra de 10 números aleatórios gerados uniformemente entre 0 e 5
rand_arr2 = np.random.randint(5, size = 10)
rand_arr2

# Amostra de 10 números aleatórios gerados uniformemente entre 100 e 200
rand_arr3 = np.random.randint(100,200, size = 10)
rand_arr3

array([103, 178, 101, 191, 110, 172, 136, 102, 172, 171])

# A partir de uma lista de listas
lista_lista = [[1,2,3], [4,5,6]]
nd_arr1 = np.array(lista_lista)
nd_arr1

# Matriz 2x3 de aleatórios
nd_arr2 = np.random.randn(2,3)
nd_arr2

# Matriz 2x3 de zeros - passamos uma tupla com as dimensões
nd_arr3 = np.zeros((2,3))

# Matriz 2x3 de 1 - passamos uma tupla com as dimensões
nd_arr3 = np.ones((2,3))
nd_arr3

#Criando uma matriz identidade 5x5:
iden = np.identity(5)
iden

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

# Matriz 2x3 de zeros - passamos uma tupla com as dimensões
nd_arr3 = np.zeros((2,3))
print(nd_arr3)

print(nd_arr3.shape)

print("Numero de linhas : \n", nd_arr3.shape[0])
print("Numero de colunas : \n", nd_arr3.shape[1])

[[0. 0. 0.]
 [0. 0. 0.]]
(2, 3)
Numero de linhas : 
 2
Numero de colunas : 
 3

# Gera uma matriz 3x3 com dados aleatorios entre 2-100
arr4 = np.random.randint(2,6, size=(4,4))
print("Aleatorios :\n", arr4)

# Multiplica a linha 0 por 2:
arr4[0] = arr4[0]*2
print("Multiplica linha 0 por 2 : \n" ,arr4)

# Linha 0 - 1
arr4[0] = arr4[0] - 1
print("Linha 0 - 1 : \n" ,arr4)

# Eleva todos os elementos ao quadrado:
arr4 = arr4**2
print("Todos os elementos^2 : \n" ,arr4)

# Linha:
arr4[1] = arr4[1] - arr4[0]
print("Linha 1 =  linha 1 - linha 0 : \n" ,arr4)

Aleatorios :
 [[5 3 2 4]
 [5 5 5 5]
 [5 2 3 2]
 [3 4 5 4]]
Multiplica linha 0 por 2 : 
 [[10  6  4  8]
 [ 5  5  5  5]
 [ 5  2  3  2]
 [ 3  4  5  4]]
Linha 0 - 1 : 
 [[9 5 3 7]
 [5 5 5 5]
 [5 2 3 2]
 [3 4 5 4]]
Todos os elementos^2 : 
 [[81 25  9 49]
 [25 25 25 25]
 [25  4  9  4]
 [ 9 16 25 16]]
Linha 1 =  linha 1 - linha 0 : 
 [[ 81  25   9  49]
 [-56   0  16 -24]
 [ 25   4   9   4]
 [  9  16  25  16]]

M = np.array([[10. ,3. ,3. ,4.],[2. ,3. ,3. ,2.],[8. ,3. ,5. ,5.],[5. ,6. ,3. ,4.]])
MI = np.identity(4)
#print("iNVERSA :",np.linalg.inv(M))
#print("M \n", M)
for i in range(M.shape[0]):
    pivo  = M[i,i]
    M[i]  = M[i] / pivo
    MI[i] = MI[i] / pivo
    #print("Pivo : ", pivo,"\n", M)
    for j in range(M.shape[1]):
        if i != j:
            MI[j] = MI[j] - MI[i] * M[j,i]  
            M[j] = M[j] - M[i] * M[j,i]
print("Inversa : \n",MI)

Inversa : 
 [[ 0.28125     0.15625    -0.1875     -0.125     ]
 [ 0.13541667  0.26041667 -0.3125      0.125     ]
 [ 0.09375     0.71875    -0.0625     -0.375     ]
 [-0.625      -1.125       0.75        0.5       ]]

M = np.array([[10. ,3. ,3. ,4.],[2. ,3. ,3. ,2.],[8. ,3. ,5. ,5.],[5. ,6. ,3. ,4.]])
print("Inversa pelo NumPy : \n",np.linalg.inv(M))

Inversa pelo NumPy : 
 [[ 0.28125     0.15625    -0.1875     -0.125     ]
 [ 0.13541667  0.26041667 -0.3125      0.125     ]
 [ 0.09375     0.71875    -0.0625     -0.375     ]
 [-0.625      -1.125       0.75        0.5       ]]

M = np.array([[10 ,3 ,3 ,4],[2 ,3 ,3 ,2],[8 ,3 ,5 ,5],[5 ,6 ,3 ,4]])
M[0] = M[0]/10
print(M[0])

[1 0 0 0]

print(M.dtype)

int32

M = np.array([[10. ,3. ,3. ,4.],[2. ,3. ,3. ,2.],[8. ,3. ,5. ,5.],[5. ,6. ,3. ,4.]])
M[0] = M[0]/10
print(M[0])
print(M.dtype)

[1.  0.3 0.3 0.4]
float64

M = np.array([[10 ,3 ,3 ,4],[2 ,3 ,3 ,2],[8 ,3 ,5 ,5],[5 ,6 ,3 ,4]], dtype=np.float64)
M[0] = M[0]/10
print(M[0])
print(M.dtype)

[1.  0.3 0.3 0.4]
float64

# Gera 10 valores extraidos da normal padrão
arr = np.random.randn(10)
print(arr)

# Imprime os 5 primeiros valores (de 0 a 4)
print(arr[:5])

# Imprime os últimos valores, a partir do índice 5
print(arr[5:])

# Imprime os elementos de indices 2-5
print(arr[2:6])

[ 1.71467539  0.43702624 -0.4022961   0.89293117  0.23089958 -1.07146443
 -1.69151426 -0.87687647  0.31172282 -0.28296002]
[ 1.71467539  0.43702624 -0.4022961   0.89293117  0.23089958]
[-1.07146443 -1.69151426 -0.87687647  0.31172282 -0.28296002]
[-0.4022961   0.89293117  0.23089958 -1.07146443]

arr = np.zeros(10, dtype = np.float64)
print(arr)

arr[:5] = 10
print("Alterando os valores por fatiamento :",arr)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Alterando os valores por fatiamento : [10. 10. 10. 10. 10.  0.  0.  0.  0.  0.]

arr = np.zeros(10, dtype = np.float64)
print(arr)

copia = arr[:5].copy()
copia = 10
print("Copiando não altera os valores :",arr)

[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
Copiando não altera os valores : [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

M = np.array([[10. ,3. ,3. ,4.],[2. ,3. ,3. ,2.],[8. ,3. ,5. ,5.],[5. ,6. ,3. ,4.]])
print("Matriz original : \n", M)

# Imprime todas as linhas a partir do indice 1
print("Linhas a partir do indice 1 :\n",M[1:])

# De todas as linhas a partir do indice 1 (igual anterior), seleciona as colunas até o indice 2
print("Colunas até o índice 2, das linhas a partir do indice 1 :\n",M[1:,:3])

Matriz original : 
 [[10.  3.  3.  4.]
 [ 2.  3.  3.  2.]
 [ 8.  3.  5.  5.]
 [ 5.  6.  3.  4.]]
Linhas a partir do indice 1 :
 [[2. 3. 3. 2.]
 [8. 3. 5. 5.]
 [5. 6. 3. 4.]]
Colunas até o índice 2, das linhas a partir do indice 1 :
 [[2. 3. 3.]
 [8. 3. 5.]
 [5. 6. 3.]]

arr_string = np.array(["Dwight", "Michael", "Angela", "Oscar", "Michael", "Angela"])

# Condição : quais elementos do array são iguais a "Michael"?
arr_bool = arr_string == "Michael"
print(arr_bool)

# Condição : quais elementos do array são iguais a "Michael" OU "Angela"
arr_bool = (arr_string == "Michael") | (arr_string == "Angela")
print(arr_bool)

[False  True False False  True False]
[False  True  True False  True  True]

arr_string = np.array(["Dwight", "Michael", "Angela", "Oscar", "Michael", "Angela"])
arr_booleano = np.array([True,False,False,True,False,False])

# Seleciona somente os elementos em que arr_booleano == True
print(arr_string[arr_booleano])

['Dwight' 'Oscar']

# Gerando uma matriz 3x4 de aleatórios entre 5 e 9
ndarray = np.random.randint(5,10, size=(3,4))
ndarray

# Gerando um array de booleanos com a mesmo número de elementos da primeira dimensão da Matriz (3)
arr_bool = np.array([True,False,False])

# Imprimindo somente as linhas de ndarray que satisfazem as condições de arr_bool
print(ndarray[arr_bool])

[[5 6 9 8]]

arr_paes = np.array(["frances","italiano","sirio","frances","sirio"])
arr_pesos = np.array([[3.0,2.8,3.1,3.0,3.23],
              [5.0,5.3,4.95,4.9,5.23],
              [3.0,2.8,3.1,3.0,3.23],
              [6.0,6.8,6.1,6.0,6.23],
              [3.0,2.8,3.1,3.0,3.23]])

# Filtrando todas as linhas que contém medidas do pão francês
arr_frances = arr_pesos[arr_paes == "frances"]
print("Linhas pao frances \n", arr_frances)

# Filtrando todas as linhas que contém medidas do pão sirio
arr_frances = arr_pesos[arr_paes == "sirio"]
print("Linhas pao sirio \n", arr_frances)


# Filtrando todas as linhas que contém medidas do pão sirio OU frances
arr_frances = arr_pesos[(arr_paes == "sirio") | (arr_paes == "frances")]
print("Linhas pao sirio ou frances \n", arr_frances)

Linhas pao frances 
 [[3.   2.8  3.1  3.   3.23]
 [6.   6.8  6.1  6.   6.23]]
Linhas pao sirio 
 [[3.   2.8  3.1  3.   3.23]
 [3.   2.8  3.1  3.   3.23]]
Linhas pao sirio ou frances 
 [[3.   2.8  3.1  3.   3.23]
 [3.   2.8  3.1  3.   3.23]
 [6.   6.8  6.1  6.   6.23]
 [3.   2.8  3.1  3.   3.23]]

arr_paes = np.array(["frances","italiano","sirio","frances","sirio"])
arr_pesos = np.array([[3.0,2.8,3.1,3.0,3.23],
              [5.0,5.3,4.95,4.9,5.23],
              [3.0,2.8,3.1,3.0,3.23],
              [6.0,6.8,6.1,6.0,6.23],
              [3.0,2.8,3.1,3.0,3.23]])

arr_frances = arr_pesos[arr_paes == "frances"]
print(arr_frances)
arr_frances[0] = 99
print("Alterando arr_frances \n",arr_frances)

print("Não altera arr_pesos \n",arr_pesos)

[[3.   2.8  3.1  3.   3.23]
 [6.   6.8  6.1  6.   6.23]]
Alterando arr_frances 
 [[99.   99.   99.   99.   99.  ]
 [ 6.    6.8   6.1   6.    6.23]]
Não altera arr_pesos 
 [[3.   2.8  3.1  3.   3.23]
 [5.   5.3  4.95 4.9  5.23]
 [3.   2.8  3.1  3.   3.23]
 [6.   6.8  6.1  6.   6.23]
 [3.   2.8  3.1  3.   3.23]]

# Gera 20 elementos aleatórios (entre 10 e 19)
arr_rand = np.random.randint(10,20, size=(20))
print("Valores : \n", arr_rand)

# Calcula a soma
print("Soma : \n", arr_rand.sum())

# Calcula a media
print("Média : \n", arr_rand.mean())

# Calcula o desv. padrão
print("Desvio padrão : \n", arr_rand.std())

# Calcula a variancia
print("Variância : \n", arr_rand.var())

# Máximo
print("Máximo :\n",arr_rand.max())

# Indice do Máximo
print("Indice do Máximo :\n",arr_rand.argmax())

# Soma cumulativa dos elementos começando em 0
print("Soma cumulativa :\n", arr_rand.cumsum())

Valores : 
 [18 18 18 19 18 18 14 15 19 19 14 17 11 13 11 17 10 10 15 17]
Soma : 
 311
Média : 
 15.55
Desvio padrão : 
 3.0573681492420897
Variância : 
 9.3475
Máximo :
 19
Indice do Máximo :
 3
Soma cumulativa :
 [ 18  36  54  73  91 109 123 138 157 176 190 207 218 231 242 259 269 279
 294 311]

arr_m = np.array([[1,1,1,1], 
                  [4,5,6,6],
                  [10,4,3,2]])

print("Média por colunas", arr_m.mean(axis=0))
print("Média por linhas", arr_m.mean(axis=1))

print("Maior elemento", arr_m.max())

Média por colunas [5.         3.33333333 3.33333333 3.        ]
Média por linhas [1.   5.25 4.75]
Maior elemento 10

import pandas as pd

ser1 = pd.Series([4,3,4,5])
print(ser1)
print(type(ser1))

0    4
1    3
2    4
3    5
dtype: int64
<class 'pandas.core.series.Series'>

print(ser1.values)
print(ser1.index)

[4 3 4 5]
RangeIndex(start=0, stop=4, step=1)

print(type(ser1.values))

<class 'numpy.ndarray'>

ser2 = pd.Series([1,2,3,4], index=["a","b","c","d"])
print(ser2)

a    1
b    2
c    3
d    4
dtype: int64

print(ser2["a"])

# Alterando o elemento
ser2["a"] = 999
print(ser2.values)

1
[999   2   3   4]

sr_string = pd.Series(["P1","P2","P3","P2","P1"])

#Retornando somente os valores sem repetição:
sr_string.unique()

array(['P1', 'P2', 'P3'], dtype=object)

ser2 = pd.Series([1,2,3,4], index=["a","b","c","d"])
# Acessando pelos índices do array
ser2[0]

C:\Users\x-eco\AppData\Local\Temp\ipykernel_11152\4112492939.py:3: FutureWarning: Series.__getitem__ treating keys as positions is deprecated. In a future version, integer keys will always be treated as labels (consistent with DataFrame behavior). To access a value by position, use `ser.iloc[pos]`
  ser2[0]

1

ser2["a"]

1

ser2 = pd.Series([1,2,3,4], index=[1,3,5,7])
ser2[0] # Erro

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\indexes\base.py:3805, in Index.get_loc(self, key)
   3804 try:
-> 3805     return self._engine.get_loc(casted_key)
   3806 except KeyError as err:

File index.pyx:167, in pandas._libs.index.IndexEngine.get_loc()

File index.pyx:196, in pandas._libs.index.IndexEngine.get_loc()

File pandas\\_libs\\hashtable_class_helper.pxi:2606, in pandas._libs.hashtable.Int64HashTable.get_item()

File pandas\\_libs\\hashtable_class_helper.pxi:2630, in pandas._libs.hashtable.Int64HashTable.get_item()

KeyError: 0

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
Cell In[35], line 2
      1 ser2 = pd.Series([1,2,3,4], index=[1,3,5,7])
----> 2 ser2[0] # Erro

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\series.py:1121, in Series.__getitem__(self, key)
   1118     return self._values[key]
   1120 elif key_is_scalar:
-> 1121     return self._get_value(key)
   1123 # Convert generator to list before going through hashable part
   1124 # (We will iterate through the generator there to check for slices)
   1125 if is_iterator(key):

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\series.py:1237, in Series._get_value(self, label, takeable)
   1234     return self._values[label]
   1236 # Similar to Index.get_value, but we do not fall back to positional
-> 1237 loc = self.index.get_loc(label)
   1239 if is_integer(loc):
   1240     return self._values[loc]

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\indexes\base.py:3812, in Index.get_loc(self, key)
   3807     if isinstance(casted_key, slice) or (
   3808         isinstance(casted_key, abc.Iterable)
   3809         and any(isinstance(x, slice) for x in casted_key)
   3810     ):
   3811         raise InvalidIndexError(key)
-> 3812     raise KeyError(key) from err
   3813 except TypeError:
   3814     # If we have a listlike key, _check_indexing_error will raise
   3815     #  InvalidIndexError. Otherwise we fall through and re-raise
   3816     #  the TypeError.
   3817     self._check_indexing_error(key)

KeyError: 0

ser2 = pd.Series([1,2,3,4], index=[1,3,5,7])


# Acessando o índice sequencial do array:
print(ser2.iloc[0], ser2.iloc[3])

# Acessando o índice criado na Series:
print(ser2.loc[1], ser2.loc[7])

1 4
1 4

# DataFrame a partir de um dicionário de listas: as chaves são os nomes das colunas e as listas os valores
dic1 = {"peça1":[1,2,3,4],
        "peça2":[5,2,3,5],
        "peça3":[2,3,4,3]}
dt1 = pd.DataFrame(dic1)
print(dt1)

# DataFrame a partir de uma lista de tuplas
l_tuplas = [(10,20,30),(40,50,60),(70,80,90)]
dt_tuplas = pd.DataFrame(l_tuplas)
print(dt_tuplas)

   peça1  peça2  peça3
0      1      5      2
1      2      2      3
2      3      3      4
3      4      5      3
    0   1   2
0  10  20  30
1  40  50  60
2  70  80  90

# DataFrame a partir de um dicionário de listas com indices criados
dic2 = {"peça1":[1,2,3,4],
        "peça2":[5,2,3,5],
        "peça3":[2,3,4,3]}
dt2 = pd.DataFrame(dic2, index = [3,4,5,6])
print(dt2)

   peça1  peça2  peça3
3      1      5      2
4      2      2      3
5      3      3      4
6      4      5      3

# Selecionando uma coluna
dt1["peça1"]

# Note que o tipo de dados da coluna é:
print("Tipo da coluna :", type(dt1["peça1"]))

# Assim, sabemos que a Series tem duas partes; values e index, e que values é um NumPy array, podemos extrair as colunas dos
# dataframes como arrays e usar tudo que já sabemos sobre o NumPy:
arr = dt1["peça1"].values
print(type(arr))

# Extraindo dados de um array normalmente
print("Soma :",arr.sum())
print("Maximo :",arr.max())
print("Mínimo :",arr.min())

Tipo da coluna : <class 'pandas.core.series.Series'>
<class 'numpy.ndarray'>
Soma : 10
Maximo : 4
Mínimo : 1

print(dt1.loc[0])

peça1    1
peça2    5
peça3    2
Name: 0, dtype: int64

print(dt2.loc[0])

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\indexes\base.py:3805, in Index.get_loc(self, key)
   3804 try:
-> 3805     return self._engine.get_loc(casted_key)
   3806 except KeyError as err:

File index.pyx:167, in pandas._libs.index.IndexEngine.get_loc()

File index.pyx:196, in pandas._libs.index.IndexEngine.get_loc()

File pandas\\_libs\\hashtable_class_helper.pxi:2606, in pandas._libs.hashtable.Int64HashTable.get_item()

File pandas\\_libs\\hashtable_class_helper.pxi:2630, in pandas._libs.hashtable.Int64HashTable.get_item()

KeyError: 0

The above exception was the direct cause of the following exception:

KeyError                                  Traceback (most recent call last)
Cell In[41], line 1
----> 1 print(dt2.loc[0])

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\indexing.py:1191, in _LocationIndexer.__getitem__(self, key)
   1189 maybe_callable = com.apply_if_callable(key, self.obj)
   1190 maybe_callable = self._check_deprecated_callable_usage(key, maybe_callable)
-> 1191 return self._getitem_axis(maybe_callable, axis=axis)

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\indexing.py:1431, in _LocIndexer._getitem_axis(self, key, axis)
   1429 # fall thru to straight lookup
   1430 self._validate_key(key, axis)
-> 1431 return self._get_label(key, axis=axis)

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\indexing.py:1381, in _LocIndexer._get_label(self, label, axis)
   1379 def _get_label(self, label, axis: AxisInt):
   1380     # GH#5567 this will fail if the label is not present in the axis.
-> 1381     return self.obj.xs(label, axis=axis)

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\generic.py:4301, in NDFrame.xs(self, key, axis, level, drop_level)
   4299             new_index = index[loc]
   4300 else:
-> 4301     loc = index.get_loc(key)
   4303     if isinstance(loc, np.ndarray):
   4304         if loc.dtype == np.bool_:

File ~\AppData\Local\Programs\Python\Python312\Lib\site-packages\pandas\core\indexes\base.py:3812, in Index.get_loc(self, key)
   3807     if isinstance(casted_key, slice) or (
   3808         isinstance(casted_key, abc.Iterable)
   3809         and any(isinstance(x, slice) for x in casted_key)
   3810     ):
   3811         raise InvalidIndexError(key)
-> 3812     raise KeyError(key) from err
   3813 except TypeError:
   3814     # If we have a listlike key, _check_indexing_error will raise
   3815     #  InvalidIndexError. Otherwise we fall through and re-raise
   3816     #  the TypeError.
   3817     self._check_indexing_error(key)

KeyError: 0

dt2.iloc[0]

peça1    1
peça2    5
peça3    2
Name: 3, dtype: int64

type(dt1.columns)

pandas.core.indexes.base.Index

# Todos os elementos da nova coluna são preenchidas com o valor 10
dt1["Nova coluna"] = 10
dt1

del dt1["peça1"]
dt1

dt1.sort_values(by="peça2")

caminho = r"G:\Meu Drive\Arquivos\UFPR\1 - Disciplinas\2 - Intro  Mineração de Dados\5-Python\Datasets\dados_filas_SICOOB.csv"
dt = pd.read_csv(caminho,sep = ";")
dt

caminho_url = "https://raw.githubusercontent.com/cs109/2014_data/master/countries.csv"
dt_url = pd.read_csv(caminho_url, sep = ",")
dt_url

# Podemos usar a string pura do caminho (sem barras invertidas), usando a letra 'r' antes de começar o caminho
caminho_excel = r"G:\Meu Drive\Arquivos\UFPR\1 - Disciplinas\2 - Intro  Mineração de Dados\5-Python\Datasets\db_addresses_lat_long.xlsx"
dt_excel = pd.read_excel(caminho_excel)
dt_excel

caminho = r"G:\Meu Drive\Arquivos\UFPR\1 - Disciplinas\2 - Intro  Mineração de Dados\5-Python\Arquivo_exportado.csv"
dt1.to_csv(caminho)

dt1

caminho = r"G:\Meu Drive\Arquivos\UFPR\1 - Disciplinas\2 - Intro  Mineração de Dados\5-Python\Arquivo_exportado.csv"
dt1.to_csv(caminho, sep = ";", index = False, encoding = "utf-8-sig")

#dt1.shape
#dt1.info()
dt1.describe()

dt_production = pd.read_csv(r"G:\Meu Drive\Arquivos\UFPR\1 - Disciplinas\2 - Intro  Mineração de Dados\5-Python\Datasets\Production_Data.csv",sep = ",")

cond = dt_production["Activity"] == "Turning & Milling - Machine 4"
cond

0        True
1        True
2        True
3        True
4       False
        ...  
4538    False
4539    False
4540    False
4541    False
4542    False
Name: Activity, Length: 4543, dtype: bool

dt_production[cond]

dt_production[ dt_production["Activity"] == "Turning & Milling - Machine 4"]

dt_production["Activity"].unique()

array(['Turning & Milling - Machine 4', 'Turning & Milling Q.C.',
       'Laser Marking - Machine 7', 'Lapping - Machine 1',
       'Round Grinding - Machine 3', 'Final Inspection Q.C.', 'Packing',
       'Turning & Milling - Machine 9', 'Turning Q.C.',
       'Flat Grinding - Machine 11', 'Turning & Milling - Machine 8',
       'Grinding Rework - Machine 12', 'Setup - Machine 8',
       'Round Grinding - Machine 12', 'Round Grinding - Manual',
       'Round Grinding - Q.C.', 'Turning & Milling - Machine 5',
       'Turning & Milling - Machine 10', 'Round Grinding - Machine 2',
       'Turning & Milling - Machine 6', 'Turning - Machine 4',
       'Grinding Rework', 'SETUP     Turning & Milling - Machine 5',
       'Final Inspection - Weighting', 'Turning - Machine 9',
       'Deburring - Manual', 'Turning - Machine 8',
       'Wire Cut - Machine 13', 'Wire Cut - Machine 18',
       'Rework Milling - Machine 28', 'Fix EDM', 'Milling Q.C.',
       'Milling - Machine 14', 'Flat Grinding - Machine 26',
       'Grinding Rework - Machine 27', 'Grinding Rework - Machine 2',
       'Fix - Machine 19', 'Round  Q.C.', 'Stress Relief',
       'Turning Rework - Machine 21', 'Milling - Machine 10',
       'Milling - Machine 16', 'Change Version - Machine 22',
       'Turning - Machine 5', 'Round Grinding - Machine 19',
       'Fix - Machine 3', 'Turn & Mill. & Screw Assem - Machine 9',
       'Nitration Q.C.', 'Round Grinding - Machine 23',
       'Fix - Machine 15', 'Turn & Mill. & Screw Assem - Machine 10',
       'Fix - Machine 15M', 'Turning - Machine 21', 'Milling - Machine 8',
       'Setup - Machine 4'], dtype=object)

	Agencia	dia	categoria	Senhas atendimento	time	Até 5 min. de espera	5 a 10 min. de espera	10 a 15 min. de espera	15 a 20 min. de espera	20 a 30 min. de espera	30 a 40 min. de espera	40 a 50 min. de espera	Acima de 50 min. de espera
0	Araras	02/01/2023	3 - A - CAIXA	26	00:04:34	19	2	2	3	0	0	0	0
1	Araras	02/01/2023	4 - B - CAIXA PREFERENCIAL	8	00:01:00	8	0	0	0	0	0	0	0
2	Araras	03/01/2023	3 - A - CAIXA	28	00:04:46	19	4	3	2	0	0	0	0
3	Araras	03/01/2023	4 - B - CAIXA PREFERENCIAL	8	00:02:36	7	1	0	0	0	0	0	0
4	Araras	04/01/2023	3 - A - CAIXA	21	00:03:09	15	5	1	0	0	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...
4038	PA SALTINHO	29/05/2023	6 - C - CAIXA PREFERENCIAL + 80 ANOS	1	00:08:46	0	1	0	0	0	0	0	0
4039	PA SALTINHO	30/05/2023	3 - A - CAIXA	50	00:05:19	28	11	9	2	0	0	0	0
4040	PA SALTINHO	30/05/2023	4 - B - CAIXA PREFERENCIAL	17	00:03:52	14	1	0	1	1	0	0	0
4041	PA SALTINHO	31/05/2023	3 - A - CAIXA	39	00:04:17	26	9	4	0	0	0	0	0
4042	PA SALTINHO	31/05/2023	4 - B - CAIXA PREFERENCIAL	17	00:04:38	11	3	2	1	0	0	0	0

	Country	Region
0	Algeria	AFRICA
1	Angola	AFRICA
2	Benin	AFRICA
3	Botswana	AFRICA
4	Burkina	AFRICA
...	...	...
189	Paraguay	SOUTH AMERICA
190	Peru	SOUTH AMERICA
191	Suriname	SOUTH AMERICA
192	Uruguay	SOUTH AMERICA
193	Venezuela	SOUTH AMERICA

	Nome	Endereco	Numero	Bairro	Cidade	Estado	Pais	lat	long	Janela de tempo inicial	Janela de tempo final	Tempo de servico	Demanda	Notas
0	ADEMIR JOSï¿½ VIEIRA	RUA LAUDELINO FERREIRA LOPES	229	NOVO MUNDO	Curitiba	Parana	Brasil	-25.506899	-49.304566	2022-05-10 08:00:00	2022-05-10 18:00:00	0	0	NaN
1	ADRIANE ANGERER ULIANA	RUA MURILO DO AMARAL FERREIRA	72	ï¿½GUA VERDE	Curitiba	Parana	Brazil	-25.458970	-49.288281	2022-05-10 08:00:00	2022-05-10 18:00:00	0	1	NaN
2	ALESSANDRO DA SILVA	RUA ARATICUM	214	UBERABA	Curitiba	Parana	Brazil	-25.476699	-49.223430	2022-05-10 08:00:00	2022-05-10 18:00:00	0	1	NaN
3	ALEXANDRE ALMEIDA BLITZKOW	RUA ALFERES ï¿½NGELO SAMPAIO	1495	BATEL	Curitiba	Parana	Brazil	-25.440483	-49.284293	2022-05-10 08:00:00	2022-05-10 18:00:00	0	1	NaN
4	ALEXANDRE AUGUSTO LEAL	RUA JOï¿½O GUARIZA	522	Sï¿½O LOURENï¿½O	Curitiba	Parana	Brazil	-25.391206	-49.265719	2022-05-10 08:00:00	2022-05-10 18:00:00	0	1	NaN
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
494	Mï¿½RIO GUIMARï¿½ES FILHO	RUA PADRE ANCHIETA	1205	BIGORRILHO	Curitiba	Parana	Brazil	-25.430058	-49.291104	2022-05-10 08:00:00	2022-05-10 18:00:00	0	1	NaN
495	MARIO HENRIQUE RITZMANN	RUA TAMBAQUIS	715	ALPHAVILLE GRACIOSA	Curitiba	Parana	Brazil	-25.398363	-49.160188	2022-05-10 08:00:00	2022-05-10 18:00:00	0	1	NaN
496	MARLI LOPES REGAGNAN	AVENIDA DOUTOR EUGï¿½NIO BERTOLLI	3062	SANTA FELICIDADE	Curitiba	Parana	Brazil	-25.374206	-49.332474	2022-05-10 08:00:00	2022-05-10 18:00:00	0	1	NaN
497	MATHEUS CARVALHO DOS SANTOS	RUA PROFESSOR ï¿½LVARO JORGE	795	VILA IZABEL	Curitiba	Parana	Brazil	-25.457559	-49.296705	2022-05-10 08:00:00	2022-05-10 18:00:00	0	1	NaN
498	MAURO MULLER GIL CARDOSO	RUA ï¿½NGELO DALLARMI	328	SANTA FELICIDADE	Curitiba	Parana	Brazil	-25.416720	-49.335714	2022-05-10 08:00:00	2022-05-10 18:00:00	0	1	NaN

	peça2	peça3	Nova coluna
count	4.00	4.000000	4.0
mean	3.75	3.000000	10.0
std	1.50	0.816497	0.0
min	2.00	2.000000	10.0
25%	2.75	2.750000	10.0
50%	4.00	3.000000	10.0
75%	5.00	3.250000	10.0
max	5.00	4.000000	10.0

	Case ID	Activity	Resource	Start Timestamp	Complete Timestamp	Span	Work Order Qty	Part Desc.	Worker ID	Report Type	Qty Completed	Qty Rejected	Qty for MRB	Rework
0	Case 1	Turning & Milling - Machine 4	Machine 4 - Turning & Milling	2012/01/29 23:24:00.000	2012/01/30 05:43:00.000	006:19	10	Cable Head	ID4932	S	1	0	0	NaN
1	Case 1	Turning & Milling - Machine 4	Machine 4 - Turning & Milling	2012/01/30 05:44:00.000	2012/01/30 06:42:00.000	000:58	10	Cable Head	ID4932	D	1	0	0	NaN
2	Case 1	Turning & Milling - Machine 4	Machine 4 - Turning & Milling	2012/01/30 06:59:00.000	2012/01/30 07:21:00.000	000:22	10	Cable Head	ID4167	S	0	0	0	NaN
3	Case 1	Turning & Milling - Machine 4	Machine 4 - Turning & Milling	2012/01/30 07:21:00.000	2012/01/30 10:58:00.000	003:37	10	Cable Head	ID4167	D	8	0	0	NaN
126	Case 105	Turning & Milling - Machine 4	Machine 4 - Turning & Milling	2012/03/20 23:18:00.000	2012/03/21 06:34:00.000	007:16	15	Bearing	ID4167	S	0	0	0	NaN
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
4463	Case 95	Turning & Milling - Machine 4	Machine 4 - Turning & Milling	2012/03/02 06:44:00.000	2012/03/02 12:45:00.000	006:01	305	Punch Holder	ID4167	D	14	0	0	NaN
4464	Case 95	Turning & Milling - Machine 4	Machine 4 - Turning & Milling	2012/03/03 12:57:00.000	2012/03/03 19:57:00.000	007:00	305	Punch Holder	ID4529	D	16	0	0	NaN
4465	Case 95	Turning & Milling - Machine 4	Machine 4 - Turning & Milling	2012/03/03 22:23:00.000	2012/03/04 06:47:00.000	008:24	305	Punch Holder	ID4641	D	28	0	0	NaN
4466	Case 95	Turning & Milling - Machine 4	Machine 4 - Turning & Milling	2012/03/04 06:56:00.000	2012/03/04 11:11:00.000	004:15	305	Punch Holder	ID4932	D	53	0	0	NaN
4470	Case 95	Turning & Milling - Machine 4	Machine 4 - Turning & Milling	2012/03/06 08:15:00.000	2012/03/06 08:16:00.000	000:01	305	Punch Holder	ID4932	D	2	0	0	NaN

Aula 4 - NumPy e Pandas I¶

4.1 NumPy¶

4.1.1Array NumPy¶

4.1.2 Inicialização de arrays¶

np.arange¶

np.zeros() e np.ones()¶

np.random()¶

4.1.3 Arrays multidimensionais (N-dimensional array)¶

4.1.4 Aritmética com arrays¶

4.1.5 Fatiamento de arrays¶

4.1.6 Indexação booleana¶

4.1.7 Métodos matemáticos e estatísticos¶

Exercícios I

4.2 Pandas I¶

4.2.1 Series¶

Alterando elementos¶

O método unique()¶

Acessando elementos da Series¶

loc() e iloc()¶

4.2.2 Dataframe¶

Criação de Dataframes¶

Ordenando¶

Carregando dados em um DataFrame¶

Exportando dados de um Dataframe¶

Filtros e indexação booleana¶

Exercícios II