기존 XML문서에 코드 레벨에서 Tag 및 인자 추가
from bs4 import BeautifulSoup fp = open("song.xml") soup = BeautifulSoup(fp, "html.parser") chanElm = soup.find('channel') songElm = soup.new_tag('song', sname = 'sname4') titleElm = soup.new_tag('title') titleElm.string = 'title4' singerElm = soup.new_tag('singer') singerElm.string = 'singer4' songElm.append(titleElm) songElm.append(singerElm) chanElm.append(songElm) print(soup) # soup.new_tag('song', {'sname':'sname4', 'mysing':'my4'}) |
<?xml version="1.0" encoding="UTF-8"?> <rss> <channel> <song sname="sname1"> <title>song1</title> <singer>singer1</singer> </song> <song sname="sname2"> <title>song2</title> <singer>singer2</singer> </song> <song sname="sname3"> <title>song3</title> <singer>singer3</singer> </song> <song sname="sname4"><title>title4</title><singer>singer4</singer></song></channel> </rss> |
jSon Parsing
import json myList = [{'name':'aaa', 'age':20}, {'name':'bbb', 'age':30}, {'name':'ccc', 'age':40}] def jsonWrite(): #dump를 쓰게될 경우 스트링으로 변환 작업을 거치치만... # str1 = json.dumps(myList) # str1 = json.dump(myList, fp) # print(type(str1)) # fp.write(str1) #덤프를 사용할 경우 그작업이없다. 심플해진다. fp = open('myj.json', 'w') json.dump(myList, fp) fp.close() print('json write')
def jsonRead(): # fp = open('myj.json', 'r') # rd = fp.read() # fp.close() # print(rd) # print(type(rd))
# rd = json.loads(rd) # print(type(rd)) # for n in rd: # print('%(name)s %(age)d' % n) fp = open('myj.json', 'r') rd = json.load(fp) fp.close() for n in rd: print('%(name)s %(age)d' % n)
if __name__ == '__main__': jsonWrite() jsonRead() #------------------------------------------------------------------------------------------------------- # strJson = ''' # { # "snapshot" : { # "repos" : "lg.com/repositories/snapshots", # "userid" : "lg", # "passwd" : "1234" # }, # "release" : { # "repos" : "lg.com/release", # "userid" : "test", # "passwd" : "5678" # }, # "component" : { # "test":"lg.com" # } # } # ''' # myD = json.loads(strJson) # print(type(myD)) # print(myD['snapshot']['passwd']) |
json write aaa 20 bbb 30 ccc 40 |
Python Debugging
#Log를 남겨보자
import logging import os
if os.path.isdir('log') == False: os.mkdir('log')
#append mode : 기존 로그에 추가 하기 logging.basicConfig(filename='./log/my.log', filemode = 'a', level = logging.DEBUG, format = '[%(asctime)s][%(levelname)s] %(message)s %(lineno)d')
logging.info("program start.......") logging.critical("my critical") logging.error('my err') logging.warn('my warnning') logging.log(logging.WARNING, 'start msg...')
try: a = 10/0 print(a) logging.info('adata=%d' %a) except Exception as err: logging.error(err) |
|
Python excel 사용하기
pip install XlsxWriter : read/Write 용이한 third party library
import xlsxwriter wb = xlsxwriter.Workbook('my.xlsx') ws1 = wb.add_worksheet('mysheet1') ws2 = wb.add_worksheet('mysheet2') ws1.write('A1', 10) ws1.write('A2', 20) ws2.write('A1', 'hello') ws2.write('A2', 100) ws2.write('A3', 200) ws2.write('A4', '=sum(A2:A3)') wb.close() print('excel write...') |
pip install openpyxl
wb = openpyxl.Workbook() ws1 = wb.active ws1.title = "mysheet1" ws2 = wb.create_sheet('mysheet2') ws1['A1'] = 'korea' ws1['A2'] = 20 ws1.append([10, 20, 30]) ws2['A1'] = 'hello' ws2['A2'] = 200 wb.save('otes.xlsx') |
wb = openpyxl.load_workbook('otes.xlsx') ws = wb.get_sheet_by_name('mysheet1') ws['B1'] = 'python' print(ws['A1'].value) print(ws['A2'].value) wb.save('otes.xlsx') |
데이터 생성하여 excel 차트 그리기
import openpyxl from openpyxl.chart import BarChart, LineChart, Series, Reference wb = openpyxl.Workbook() ws1 = wb.active ws2 = wb.create_sheet(title="mysheet2") ws1.append(['number', 'title', 'title2', 'title3']) ws1.append([2, 10, 20 , 30]) ws1.append([3, 40, 40 , 20]) ws1.append([4, 50, 70 , 60]) ws1.append([5, 60, 80 , 80]) ws1.append([6, 70, 90 , 90]) ws1.append([7, 80, 20 , 20]) wb.save('mychart.xlsx') chart1 = BarChart() chart1.style = 10 chart1.title = "Bar chart" chart1.x_axis.title = "x test" chart1.y_axis.title = "y test" data = Reference(ws1, min_col=1, min_row=1, max_row=7, max_col=4) cat = Reference(ws1, min_col=1, min_row=2, max_row=7) chart1.add_data(data, titles_from_data=True) chart1.set_categories(cat) chart1.shape = 3 ws1.add_chart(chart1, 'F1') wb.save('mychart.xlsx') |
Final Work - 웹로그 분석하기
하기와 같은 Apach Server의 접속 로그가 있다고 가정한다.
0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:41:51 +0900] "GET / HTTP/1.1" 200 11450 0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:41:51 +0900] "GET /tomcat.png HTTP/1.1" 200 5103 0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:41:51 +0900] "GET /tomcat.css HTTP/1.1" 200 5926 0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:41:52 +0900] "GET /bg-nav.png HTTP/1.1" 200 1401 0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:41:52 +0900] "GET /bg-middle.png HTTP/1.1" 200 1918 0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:41:52 +0900] "GET /bg-button.png HTTP/1.1" 200 713 0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:41:52 +0900] "GET /asf-logo.png HTTP/1.1" 200 17811 0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:41:52 +0900] "GET /bg-upper.png HTTP/1.1" 200 3103 0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:41:52 +0900] "GET /favicon.ico HTTP/1.1" 200 21630 0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:42:07 +0900] "GET /host-manager/html HTTP/1.1" 401 2098 0:0:0:0:0:0:0:1 - admin [22/Jul/2016:11:42:12 +0900] "GET /host-manager/html HTTP/1.1" 200 7890 0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:42:12 +0900] "GET /host-manager/images/tomcat.gif HTTP/1.1" 200 1934 0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:42:12 +0900] "GET /host-manager/images/asf-logo.gif HTTP/1.1" 200 7279 0:0:0:0:0:0:0:1 - admin [22/Jul/2016:11:42:19 +0900] "GET /host-manager/html HTTP/1.1" 200 7758 0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:42:22 +0900] "GET / HTTP/1.1" 200 11450 0:0:0:0:0:0:0:1 - admin [22/Jul/2016:11:42:24 +0900] "GET /host-manager/html HTTP/1.1" 200 7758 0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:42:34 +0900] "GET / HTTP/1.1" 200 11450 0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:45:35 +0900] "GET /h1.html HTTP/1.1" 200 44 192.168.123.236 - - [22/Jul/2016:11:48:14 +0900] "GET /h3.html HTTP/1.1" 200 44 192.168.123.236 - - [22/Jul/2016:11:48:14 +0900] "GET /favicon.ico HTTP/1.1" 200 21630 192.168.123.241 - - [22/Jul/2016:11:48:15 +0900] "GET /h1.html HTTP/1.1" 200 44 192.168.123.241 - - [22/Jul/2016:11:48:15 +0900] "GET /favicon.ico HTTP/1.1" 200 21630 192.168.123.240 - - [22/Jul/2016:11:48:16 +0900] "GET / HTTP/1.1" 200 11450 192.168.123.240 - - [22/Jul/2016:11:48:16 +0900] "GET /tomcat.css HTTP/1.1" 200 5926 192.168.123.240 - - [22/Jul/2016:11:48:16 +0900] "GET /tomcat.png HTTP/1.1" 200 5103 192.168.123.240 - - [22/Jul/2016:11:48:16 +0900] "GET /bg-nav.png HTTP/1.1" 200 1401 192.168.123.240 - - [22/Jul/2016:11:48:16 +0900] "GET /asf-logo.png HTTP/1.1" 200 17811 192.168.123.240 - - [22/Jul/2016:11:48:16 +0900] "GET /bg-upper.png HTTP/1.1" 200 3103 192.168.123.240 - - [22/Jul/2016:11:48:16 +0900] "GET /bg-middle.png HTTP/1.1" 200 1918 192.168.123.240 - - [22/Jul/2016:11:48:16 +0900] "GET /bg-button.png HTTP/1.1" 200 713 192.168.123.240 - - [22/Jul/2016:11:48:16 +0900] "GET /favicon.ico HTTP/1.1" 200 21630 192.168.123.236 - - [22/Jul/2016:11:48:20 +0900] "GET /h2.html HTTP/1.1" 200 44 192.168.123.226 - - [22/Jul/2016:11:48:20 +0900] "GET /h1.html HTTP/1.1" 200 44 192.168.123.226 - - [22/Jul/2016:11:48:20 +0900] "GET /favicon.ico HTTP/1.1" 200 21630 192.168.123.240 - - [22/Jul/2016:11:48:21 +0900] "GET /h2.html HTTP/1.1" 200 44 192.168.123.241 - - [22/Jul/2016:11:48:21 +0900] "GET /h2.html HTTP/1.1" 200 44 192.168.123.235 - - [22/Jul/2016:11:48:23 +0900] "GET /h1.html HTTP/1.1" 200 44 192.168.123.235 - - [22/Jul/2016:11:48:23 +0900] "GET /favicon.ico HTTP/1.1" 200 21630 |
출력 방식은 다음과 같다.
메뉴 출력 웹 로그 분석 1. 로그분석 2. 통계보기 3. 엑셀저장 4. 종료 |
1번 메뉴 선택 ================================ 호스트 : 아이디: 유저네임: 시간: 요청정보: 상태코드: 바이트사이즈 ================================ |
2. 통계분석 총 바이트 전송 사이즈 : 페이지별 전송 사이즈 : h1: h2: h3: h4: .... ================================ id별 방문 횟수 admin : ?? 번? ================================ 총 뷰 횟수(로그라인수) : 페이지별 방문 횟수 h1 : ?? h2 : ?? |
Answer
import re class View: def __init__(self, inStr): self.parseStr(inStr) def parseStr(self, inStr): # print(inStr) match = re.search('([\d.]+)\s(\S+)\s(\S+)\s\[(.+)\]\s"(.+)"\s(\S+)\s(\S+)', inStr) # print(match.groups()) self.host = match.group(1) self.identity = match.group(2) self.username = match.group(3) self.time = match.group(4) self.request = match.group(5) self.status = match.group(6) self.size = match.group(7)
class Statistics: def __init__(self, fileName): self.views = list() # list of 'View' self.readFile(fileName) self.makeStatistics()
def readFile(self, fileName): with open(fileName, 'r') as fp: while True: inLine = fp.readline() if not inLine: break self.views.append(View(inLine))
def makeStatisticsBytes(self): self.statBytes = dict() for aView in self.views: try: sizeByte = int(aView.size) except Exception: continue if aView.host in self.statBytes: self.statBytes[aView.host] += sizeByte else: self.statBytes[aView.host] = sizeByte totalBytes = 0 for anItem in self.statBytes: totalBytes += self.statBytes[anItem] self.statBytes['total'] = totalBytes # print('makeStatisticsBytes:', self.statBytes)
def makeStatisticsIp(self): self.statIp = dict() for aView in self.views: if aView.host in self.statIp: self.statIp[aView.host] += 1 else: self.statIp[aView.host] = 1 # print('makeStatisticsIp:', self.statIp)
def makeStatisticsUser(self): self.statUser = dict() for aView in self.views: if aView.username != '-': if aView.username in self.statUser: self.statUser[aView.username] += 1 else: self.statUser[aView.username] = 1 # print('makeStatisticsUser:', self.statUser)
def makeStatisticsPage(self): self.statPage = dict() for aView in self.views: # if aView.status != '200': # continue match = re.search('GET\s+(.+\.html)\s+HTTP.+', aView.request) if match: # print(match.group(1)) requestPage = match.group(1) if not(requestPage in self.statPage): self.statPage[requestPage] = dict() if aView.host in self.statPage[requestPage]: self.statPage[requestPage][aView.host] += 1 else: self.statPage[requestPage][aView.host] = 1 for aPage in self.statPage: totalView = 0 for anItem in self.statPage[aPage]: totalView += self.statPage[aPage][anItem] self.statPage[aPage]['total'] = totalView # print('makeStatisticsPage:', self.statPage)
def makeStatistics(self): self.makeStatisticsBytes() self.makeStatisticsIp() self.makeStatisticsUser() self.makeStatisticsPage()
def printResult(self): print('='*80) print('Parsing result') for aView in self.views: print(aView.host, aView.identity, aView.username, aView.time, aView.request, aView.status, aView.size) print('-'*80) print('Statistics: Bytes') for anItem in self.statBytes: print('%20s: %d' %(anItem, self.statBytes[anItem])) print('-'*80) print('Statistics: IP') for anItem in self.statIp: print('%20s: %d' %(anItem, self.statIp[anItem])) print('-'*80) print('Statistics: User') for anItem in self.statUser: print('%20s: %d' %(anItem, self.statUser[anItem])) print('-'*80) print('Statistics: Page') for aPage in self.statPage: print(aPage) for anItem in self.statPage[aPage]: print('%20s: %d' %(anItem, self.statPage[aPage][anItem])) print('='*80)
if __name__ == '__main__': st = Statistics('localhost_access_log.2016-02-26.txt') st.printResult() |
'Data > Python' 카테고리의 다른 글
Machine Learning #1 python (0) | 2018.09.18 |
---|---|
파이썬 선(禪)(Zen of Python) (0) | 2016.09.22 |
Python 강좌(4) (0) | 2016.07.20 |
Python 강좌(3) (0) | 2016.07.20 |
Python 강좌(2) (0) | 2016.07.19 |