[Python] 파이썬 데이터로 파이썬 차트 개발 하기(Column, Bar, Hue)
데이터 Pandas, 차트 matplotlib
csv 데이터 불러오기
import pandas as pd
.read_csv()
import pandas as pd
import matplotlib as mpl
import matplotlib.font_manager as fm
from matplotlib import rc
import matplotlib.pyplot as plt
import seaborn as sns
file = "./data/data_2020.csv"
data = pd.read_csv(file)
데이터 .head() 확인
data_Head = data.head()
print(data_Head)
집계일자 집계시 출발영업소코드 도착영업소코드 통행시간 요일
0 20200101 4 101 105 637 2
1 20200101 4 101 105 773 2
2 20200101 4 101 105 762 2
3 20200101 4 101 105 746 2
4 20200101 4 101 105 875 2
데이터 .info() 확인
data_Info = data.info()
print(data_Info)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 152870 entries, 0 to 152869
Data columns (total 6 columns):
집계일자 152870 non-null int64
집계시 152870 non-null int64
출발영업소코드 152870 non-null int64
도착영업소코드 152870 non-null int64
통행시간 152870 non-null int64
요일 152870 non-null int64
dtypes: int64(6)
memory usage: 7.0 MB
데이터 선택해서 가져오기
선택조건 : 집계시 항목의 14시에 해당하는 것만 가져온다.
data_2PM = data[data.집계시 == 14]
집계일자 집계시 출발영업소코드 도착영업소코드 통행시간 요일
40 20200101 14 101 105 1009 2
41 20200101 14 101 105 949 2
42 20200101 14 101 105 854 2
43 20200101 14 101 105 1033 2
44 20200101 14 101 105 994 2
154 20200101 14 101 110 3383 2
155 20200101 14 101 110 3353 2
190 20200101 14 101 115 5772 2
191 20200101 14 101 115 4782 2
192 20200101 14 101 115 5659 2
236 20200101 14 101 120 7836 2
248 20200101 14 101 125 8752 2
258 20200101 14 101 130 9695 2
277 20200101 14 101 135 14139 2
306 20200101 14 101 140 17933 2
358 20200102 14 101 105 901 3
359 20200102 14 101 105 891 3
360 20200102 14 101 105 741 3
361 20200102 14 101 105 858 3
362 20200102 14 101 105 910 3
363 20200102 14 101 105 830 3
499 20200102 14 101 110 3449 3
500 20200102 14 101 110 3473 3
501 20200102 14 101 110 3459 3
550 20200102 14 101 115 6073 3
551 20200102 14 101 115 4940 3
552 20200102 14 101 115 5721 3
596 20200101 14 101 110 3424 2
620 20200102 14 101 120 7905 3
658 20200101 14 101 110 4404 2
... ... ... ... ... ... ..
152060 20201220 14 101 110 3668 6
152066 20201219 14 101 105 991 5
152089 20201228 14 101 110 3826 0
152101 20201220 14 101 140 15626 6
152124 20201229 14 101 135 12674 1
152253 20201231 14 101 105 925 3
152254 20201231 14 101 105 935 3
152255 20201231 14 101 105 801 3
152256 20201231 14 101 105 1093 3
152257 20201231 14 101 105 974 3
152275 20201231 14 101 110 3589 3
152276 20201231 14 101 110 3965 3
152279 20201231 14 101 110 3523 3
152343 20201229 14 101 110 4033 1
152403 20201231 14 101 115 6502 3
152405 20201231 14 101 115 4989 3
152407 20201231 14 101 115 5693 3
152414 20201231 14 101 120 7952 3
152440 20201231 14 101 125 9941 3
152444 20201231 14 101 130 8528 3
152467 20201231 14 101 135 13755 3
152484 20201231 14 101 140 15866 3
152539 20201229 14 101 110 4459 1
152577 20201230 14 101 105 902 2
152588 20201231 14 101 110 3734 3
152618 20201222 14 101 135 17402 1
152646 20201231 14 101 110 3640 3
152671 20201223 14 101 135 14364 2
152690 20201231 14 101 110 4019 3
152814 20201228 14 101 135 17625 0
[7091 rows x 6 columns]
그래프그리기
임포트
import matplotlib.pyplot as plt
import matplotlib as mpl
plt.figure(figsize =(20,10))
data_2PM_Destination = sns.countplot('도착영업소코드',data=data_2PM)
data_2PM_Destination.set_title('통행시간 by 도착영업소', fontsize=18)
data_2PM_Destination.set_xlabel('도착영업소', fontdict={'size':16})
data_2PM_Destination.set_ylabel('통행시간', fontdict={'size':16})
plt.show()
한글폰트 사용하기 (한글폰트설치)
임포트
import matplotlib.font_manager as fm
from matplotlib import rc
폰트 정보 확인하기
#시스템에 설치 된 폰트를 가져오는 함수
font_list = fm.findSystemFonts(fontpaths=None, fontext='ttf')
# ttf 폰트 전체갯수
print(len(font_list))
1897
#한글 폰트인 Nanum 폰트 찾아, 폰트명과 파일위치 출력
fonts = [(f.name, f.fname) for f in fm.fontManager.ttflist if 'Nanum' in f.name]
for i in fonts:
print(i)
('NanumBarunpen', 'C:\\Windows\\Fonts\\NanumBarunpenB.ttf')
('NanumMyeongjo', 'C:\\Windows\\Fonts\\NanumMyeongjo.ttf')
('NanumMyeongjo', 'C:\\Windows\\Fonts\\NanumMyeongjoBold.ttf')
('NanumGothic', 'C:\\Windows\\Fonts\\NanumGothicExtraBold.ttf')
('NanumGothic', 'C:\\Windows\\Fonts\\NanumGothicLight.ttf')
('NanumBarunpen', 'C:\\Windows\\Fonts\\NanumBarunpenR.ttf')
('NanumGothic', 'C:\\Windows\\Fonts\\NanumGothicBold.ttf')
('NanumGothic', 'C:\\Windows\\Fonts\\NanumGothic.ttf')
('NanumBarunpenOTF', 'C:\\Windows\\Fonts\\NanumBarunpenR.otf')
('NanumSquare', 'C:\\Windows\\Fonts\\NanumSquareR.ttf')
('NanumBarunGothic', 'C:\\Windows\\Fonts\\NanumBarunGothic.ttf')
('NanumSquare', 'C:\\Windows\\Fonts\\NanumSquareB.ttf')
('NanumBarunGothic', 'C:\\Windows\\Fonts\\NanumBarunGothicLight.ttf')
('Nanum Pen Script', 'C:\\Windows\\Fonts\\NanumPen.ttf')
('NanumMyeongjo', 'C:\\Windows\\Fonts\\NanumMyeongjoExtraBold.ttf')
('NanumBarunGothic', 'C:\\Windows\\Fonts\\NanumBarunGothicUltraLight.ttf')
('Nanum Brush Script', 'C:\\Windows\\Fonts\\NanumBrush.ttf')
('NanumBarunpenOTF', 'C:\\Windows\\Fonts\\NanumBarunpenB.otf')
('NanumBarunGothic', 'C:\\Windows\\Fonts\\NanumBarunGothicBold.ttf')
한글폰트 적용
#중요! 한글폰트가 사용된 plt.show() 함수 앞에 위치 시켜야 함
rc('font', family="NanumGothic")
전체코드
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.font_manager as fm
from matplotlib import rc
import seaborn as sns
file = "./data/data_2020.csv"
data = pd.read_csv(file)
data_Head = data.head()
print(data_Head)
data_Info = data.info()
print(data_Info)
data_2PM = data[data.집계시 == 14]
print(data_2PM)
print(data_2PM.info())
#중요! 한글폰트가 사용된 plt.show() 함수 앞에 위치 시켜야 함
rc('font', family="NanumGothic")
mpl.rcParams['axes.unicode_minus'] = False
#폰트상세설정
mpl.rcParams["font.size"] = 12
mpl.rcParams["font.family"] = 'NanumGothic'
mpl.rcParams['xtick.labelsize'] = 12
mpl.rcParams['ytick.labelsize'] = 12
plt.figure(figsize =(20,10))
data_2PM_Destination = sns.countplot('도착영업소코드',data=data_2PM)
data_2PM_Destination.set_title('통행시간 by 도착영업소', fontsize=18)
data_2PM_Destination.set_xlabel('도착영업소', fontdict={'size':16})
data_2PM_Destination.set_ylabel('통행시간', fontdict={'size':16})
plt.show()
bar차트로 바꿀경우
y=
data_2PM_Destination = sns.countplot(y='도착영업소코드',data=data_2PM) # y= 를 추가해서 bar형태로 바꿀수있다
data_2PM_Destination.set_title('통행시간 by 도착영업소', fontsize=18)
data_2PM_Destination.set_xlabel('통행시간', fontdict={'size':16}) #수정
data_2PM_Destination.set_ylabel('도착영업소', fontdict={'size':16}) #수정
plt.show()
전체코드
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.font_manager as fm
from matplotlib import rc
import seaborn as sns
file = "./data/data_2020.csv"
data = pd.read_csv(file)
data_Head = data.head()
print(data_Head)
data_Info = data.info()
print(data_Info)
data_2PM = data[data.집계시 == 14]
print(data_2PM)
print(data_2PM.info())
#중요! 한글폰트가 사용된 plt.show() 함수 앞에 위치 시켜야 함
rc('font', family="NanumGothic")
mpl.rcParams['axes.unicode_minus'] = False
#폰트상세설정
mpl.rcParams["font.size"] = 12
mpl.rcParams["font.family"] = 'NanumGothic'
mpl.rcParams['xtick.labelsize'] = 12
mpl.rcParams['ytick.labelsize'] = 12
plt.figure(figsize =(20,10))
data_2PM_Destination = sns.countplot(y='도착영업소코드',data=data_2PM)
data_2PM_Destination.set_title('통행시간 by 도착영업소', fontsize=18)
data_2PM_Destination.set_xlabel('통행시간', fontdict={'size':16})
data_2PM_Destination.set_ylabel('도착영업소', fontdict={'size':16})
plt.show()
분화하기 Hue
hue=
상황) 요일별로 분화
data_2PM_Destination = sns.countplot('도착영업소코드',data=data_2PM, hue='요일') #hue=를 추가
data_2PM_Destination.set_title('통행시간 by 도착영업소', fontsize=18)
data_2PM_Destination.set_xlabel('도착영업소', fontdict={'size':16})
data_2PM_Destination.set_ylabel('통행시간', fontdict={'size':16})
전체코드
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.font_manager as fm
from matplotlib import rc
import seaborn as sns
file = "./data/data_2020.csv"
data = pd.read_csv(file)
data_Head = data.head()
print(data_Head)
data_Info = data.info()
print(data_Info)
data_2PM = data[data.집계시 == 14]
print(data_2PM)
print(data_2PM.info())
#중요! 한글폰트가 사용된 plt.show() 함수 앞에 위치 시켜야 함
rc('font', family="NanumGothic")
mpl.rcParams['axes.unicode_minus'] = False
#폰트상세설정
mpl.rcParams["font.size"] = 12
mpl.rcParams["font.family"] = 'NanumGothic'
mpl.rcParams['xtick.labelsize'] = 12
mpl.rcParams['ytick.labelsize'] = 12
plt.figure(figsize =(20,10))
data_2PM_Destination = sns.countplot('도착영업소코드',data=data_2PM, hue='요일') #hue=를 추가
data_2PM_Destination.set_title('통행시간 by 도착영업소', fontsize=18)
data_2PM_Destination.set_xlabel('도착영업소', fontdict={'size':16})
data_2PM_Destination.set_ylabel('통행시간', fontdict={'size':16})
plt.show()
댓글