Series
, DataFrame
, and Index
# In[1]
import numpy as np
import pandas as pd
# In[2]
data=pd.Series([0.25,0.5,0.75,1.0])
data
# Out[2]
0 0.25
1 0.50
2 0.75
3 1.00
dtype: float64
values
and index
attributes# In[3]
print(data.values)
print(data.index)
# Out[3]
[0.25 0.5 0.75 1. ]
RangeIndex(start=0, stop=4, step=1)
# In[4]
print(data[1])
print(data[1:3])
# Out[4]
0.5
1 0.50
2 0.75
dtype: float64
# In[5]
data=pd.Series([0.25,0.5,0.75,1.0],index=['b','a','d','c'])
data
# Out[5]
b 0.25
a 0.50
d 0.75
c 1.00
dtype: float64
# In[6]
data['b']
# Out[6]
0.25
# In[7]
population_dict={'California':39538223,'Texas':29145505,'Florida':21538187,'New York':20201249,'Pennsylvania':13002700}
population=pd.Series(population_dict)
population
# Out[7]
California 39538223
Texas 29145505
Florida 21538187
New York 20201249
Pennsylvania 13002700
dtype: int64
# In[8]
population['California']
# Out[8]
39538223
# In[9]
population['California':'Florida']
# Out[9]
California 39538223
Texas 29145505
Florida 21538187
dtype: int64
pd.Series(data,index=index)
index
is an optional argument, and data
can be one of may entitiesdata
can be a list or Numpy array like this# In[10]
pd.Series([2,4,6])
# Out[10]
0 2
1 4
2 6
dtype: int64
-data
can be a scalar, which is repeated to fill the specified index
# In[11]
pd.Series(5,index=[100,200,300])
# Out[11]
100 5
200 5
300 5
dtype: int64
# In[12]
pd.Series({2:'a',1:'b',3:'c'})
# Out[12]
2 a
1 b
3 c
dtype: object
# In[13]
pd.Series({2:'a',1:'b',3:'c'},index=[1,2])
# Out[13]
1 b
2 a
dtype: object
# In[14]
area_dict={'California':423967,'Texas':695662,'Florida':170312,'New York':141297,'Pennsylvania':119280}
area=pd.Series(area_dict)
area
# Out[14]
California 423967
Texas 695662
Florida 170312
New York 141297
Pennsylvania 119280
dtype: int64
# In[15]
states=pd.DataFrame({'population':population,'area':area})
states
# Out[15]
population area
California 39538223 423967
Texas 29145505 695662
Florida 21538187 170312
New York 20201249 141297
Pennsylvania 13002700 119280
# In[16]
states.index
# Out[16]
Index(['California', 'Texas', 'Florida', 'New York', 'Pennsylvania'], dtype='object')
# In[17]
states.columns
# Out[17]
Index(['population', 'area'], dtype='object')
# In[18]
states['area']
# Out[18]
California 423967
Texas 695662
Florida 170312
New York 141297
Pennsylvania 119280
Name: area, dtype: int64
# In[19]
pd.DataFrame(population,columns=['population'])
# Out[19]
population
California 39538223
Texas 29145505
Florida 21538187
New York 20201249
Pennsylvania 13002700
# In[20]
data=[{'a':i,'b':2*i} for i in range(3)]
pd.DataFrame(data)
# Out[20]
a b
0 0 0
1 1 2
2 2 4
NaN(Not a Number)
values.# In[21]
pd.DataFrame([{'a':1,'b':2},{'b':3,'c':4}])
# Out[21]
a b c
0 1.0 2 NaN
1 NaN 3 4.0
# In[15]
# In[22]
pd.DataFrame(np.random.rand(3,2),columns=['foo','bar'],index=['a','b','c'])
# Out[22]
foo bar
a 0.466496 0.888614
b 0.228347 0.613272
c 0.912784 0.961023
# In[23]
A=np.zeros(3,dtype=[('A','i8'),('B','f8')])
A
# Out[23]
array([(0, 0.), (0, 0.), (0, 0.)], dtype=[('A', '<i8'), ('B', '<f8')])
# In[24]
pd.DataFrame(A)
# Out[24]
A B
0 0 0.0
1 0 0.0
2 0 0.0
# In[25]
ind=pd.Index([2,3,5,7,11])
ind
# Out[25]
Int64Index([2, 3, 5, 7, 11], dtype='int64')
# In[26]
print(ind[1])
print(ind[::2])
print(ind.size, ind.shape, ind.ndim, ind.dtype)
# Out[26]
3
Int64Index([2, 5, 11], dtype='int64')
5 (5,) 1 int64
set
data structure, so that unions, intersections, differences, and other combinations can be computed in a familiar way.# In[27]
indA=pd.Index([1,3,5,7,9])
indB=pd.Index([2,3,5,7,11])
# In[28]
print(indA.intersection(indB))
print(indA.union(indB))
print(indA.symmetric_difference(indB))
# Out[28]
Int64Index([3, 5, 7], dtype='int64')
Int64Index([1, 2, 3, 5, 7, 9, 11], dtype='int64')
Int64Index([1, 2, 9, 11], dtype='int64')
글이 많은 도움이 되었습니다, 감사합니다.