본문 바로가기

Data/Python

Python 강좌(5)

기존 XML문서에 코드 레벨에서 Tag 및 인자 추가




from bs4 import BeautifulSoup


fp = open("song.xml")

soup = BeautifulSoup(fp, "html.parser")


chanElm = soup.find('channel')

songElm = soup.new_tag('song', sname = 'sname4')

titleElm = soup.new_tag('title')

titleElm.string = 'title4'

singerElm = soup.new_tag('singer')

singerElm.string = 'singer4'


songElm.append(titleElm)

songElm.append(singerElm)

chanElm.append(songElm)

print(soup)


# soup.new_tag('song', {'sname':'sname4', 'mysing':'my4'})



<?xml version="1.0" encoding="UTF-8"?>

<rss>

<channel>

<song sname="sname1">

<title>song1</title>

<singer>singer1</singer>

</song>

<song sname="sname2">

<title>song2</title>

<singer>singer2</singer>

</song>

<song sname="sname3">

<title>song3</title>

<singer>singer3</singer>

</song>

<song sname="sname4"><title>title4</title><singer>singer4</singer></song></channel>

</rss>





jSon Parsing




import json


myList = [{'name':'aaa', 'age':20}, 

          {'name':'bbb', 'age':30}, 

          {'name':'ccc', 'age':40}]


def jsonWrite():

    #dump를 쓰게될 경우 스트링으로 변환 작업을 거치치만...

#     str1 = json.dumps(myList)

#     str1 = json.dump(myList, fp)

#     print(type(str1))

#     fp.write(str1)


    #덤프를 사용할 경우 그작업이없다. 심플해진다.

    fp = open('myj.json', 'w') 

    json.dump(myList, fp)

    fp.close()

    print('json write')

    

def jsonRead():

#     fp = open('myj.json', 'r')

#     rd = fp.read()

#     fp.close()

#     print(rd)

#     print(type(rd))

    

#     rd = json.loads(rd)

#     print(type(rd))

#     for n in rd:

#         print('%(name)s %(age)d' % n)


    fp = open('myj.json', 'r')

    rd = json.load(fp)

    fp.close()

    for n in rd:

        print('%(name)s %(age)d' % n)

        

if __name__ == '__main__':

    jsonWrite()

    jsonRead()


#-------------------------------------------------------------------------------------------------------


# strJson = '''

# {    

#         "snapshot" : {

#         "repos" : "lg.com/repositories/snapshots",

#         "userid" : "lg",

#         "passwd" : "1234"

#     },

#     "release" : {

#         "repos" : "lg.com/release",

#         "userid" : "test",

#         "passwd" : "5678"

#     },

#     "component" : {

#         "test":"lg.com"

#     }

# }

# '''


# myD = json.loads(strJson)

# print(type(myD))

# print(myD['snapshot']['passwd'])



json write

aaa 20

bbb 30

ccc 40







Python Debugging



#Log를 남겨보자


 

import logging

import os

 

if os.path.isdir('log') == False:

    os.mkdir('log')

     

#append mode : 기존 로그에 추가 하기

logging.basicConfig(filename='./log/my.log',

                    filemode = 'a',

                    level = logging.DEBUG,

                    format = '[%(asctime)s][%(levelname)s] %(message)s %(lineno)d')

 

logging.info("program start.......")

logging.critical("my critical")

logging.error('my err')

logging.warn('my warnning')

logging.log(logging.WARNING, 'start msg...')

 

try:

    a = 10/0

    print(a)

    logging.info('adata=%d' %a)

except Exception as err:

    logging.error(err) 


 







Python excel 사용하기



pip install XlsxWriter : read/Write 용이한 third party library


import xlsxwriter


wb = xlsxwriter.Workbook('my.xlsx')

ws1 = wb.add_worksheet('mysheet1')

ws2 = wb.add_worksheet('mysheet2')


ws1.write('A1', 10)

ws1.write('A2', 20)


ws2.write('A1', 'hello')

ws2.write('A2', 100)

ws2.write('A3', 200)

ws2.write('A4', '=sum(A2:A3)')


wb.close()

print('excel write...')



pip install openpyxl


wb = openpyxl.Workbook()


ws1 = wb.active

ws1.title = "mysheet1"

ws2 = wb.create_sheet('mysheet2')

ws1['A1'] = 'korea'

ws1['A2'] = 20

ws1.append([10, 20, 30])

ws2['A1'] = 'hello'

ws2['A2'] = 200

wb.save('otes.xlsx')



wb = openpyxl.load_workbook('otes.xlsx')

ws = wb.get_sheet_by_name('mysheet1')

ws['B1'] = 'python'

print(ws['A1'].value)

print(ws['A2'].value)

wb.save('otes.xlsx') 


데이터 생성하여 excel 차트 그리기



import openpyxl


from openpyxl.chart import BarChart, LineChart, Series, Reference


wb = openpyxl.Workbook()

ws1 = wb.active

ws2 = wb.create_sheet(title="mysheet2")

ws1.append(['number', 'title', 'title2', 'title3'])

ws1.append([2, 10, 20 , 30])

ws1.append([3, 40, 40 , 20])

ws1.append([4, 50, 70 , 60])

ws1.append([5, 60, 80 , 80])

ws1.append([6, 70, 90 , 90])

ws1.append([7, 80, 20 , 20])

wb.save('mychart.xlsx')


chart1 = BarChart()

chart1.style = 10

chart1.title = "Bar chart"

chart1.x_axis.title = "x test"

chart1.y_axis.title = "y test"

data = Reference(ws1, min_col=1, min_row=1, max_row=7, max_col=4)

cat = Reference(ws1, min_col=1, min_row=2, max_row=7)

chart1.add_data(data, titles_from_data=True)

chart1.set_categories(cat)

chart1.shape = 3

ws1.add_chart(chart1, 'F1')

wb.save('mychart.xlsx')






Final Work - 웹로그 분석하기



하기와 같은 Apach Server의 접속 로그가 있다고 가정한다.



0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:41:51 +0900] "GET / HTTP/1.1" 200 11450

0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:41:51 +0900] "GET /tomcat.png HTTP/1.1" 200 5103

0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:41:51 +0900] "GET /tomcat.css HTTP/1.1" 200 5926

0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:41:52 +0900] "GET /bg-nav.png HTTP/1.1" 200 1401

0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:41:52 +0900] "GET /bg-middle.png HTTP/1.1" 200 1918

0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:41:52 +0900] "GET /bg-button.png HTTP/1.1" 200 713

0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:41:52 +0900] "GET /asf-logo.png HTTP/1.1" 200 17811

0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:41:52 +0900] "GET /bg-upper.png HTTP/1.1" 200 3103

0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:41:52 +0900] "GET /favicon.ico HTTP/1.1" 200 21630

0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:42:07 +0900] "GET /host-manager/html HTTP/1.1" 401 2098

0:0:0:0:0:0:0:1 - admin [22/Jul/2016:11:42:12 +0900] "GET /host-manager/html HTTP/1.1" 200 7890

0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:42:12 +0900] "GET /host-manager/images/tomcat.gif HTTP/1.1" 200 1934

0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:42:12 +0900] "GET /host-manager/images/asf-logo.gif HTTP/1.1" 200 7279

0:0:0:0:0:0:0:1 - admin [22/Jul/2016:11:42:19 +0900] "GET /host-manager/html HTTP/1.1" 200 7758

0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:42:22 +0900] "GET / HTTP/1.1" 200 11450

0:0:0:0:0:0:0:1 - admin [22/Jul/2016:11:42:24 +0900] "GET /host-manager/html HTTP/1.1" 200 7758

0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:42:34 +0900] "GET / HTTP/1.1" 200 11450

0:0:0:0:0:0:0:1 - - [22/Jul/2016:11:45:35 +0900] "GET /h1.html HTTP/1.1" 200 44

192.168.123.236 - - [22/Jul/2016:11:48:14 +0900] "GET /h3.html HTTP/1.1" 200 44

192.168.123.236 - - [22/Jul/2016:11:48:14 +0900] "GET /favicon.ico HTTP/1.1" 200 21630

192.168.123.241 - - [22/Jul/2016:11:48:15 +0900] "GET /h1.html HTTP/1.1" 200 44

192.168.123.241 - - [22/Jul/2016:11:48:15 +0900] "GET /favicon.ico HTTP/1.1" 200 21630

192.168.123.240 - - [22/Jul/2016:11:48:16 +0900] "GET / HTTP/1.1" 200 11450

192.168.123.240 - - [22/Jul/2016:11:48:16 +0900] "GET /tomcat.css HTTP/1.1" 200 5926

192.168.123.240 - - [22/Jul/2016:11:48:16 +0900] "GET /tomcat.png HTTP/1.1" 200 5103

192.168.123.240 - - [22/Jul/2016:11:48:16 +0900] "GET /bg-nav.png HTTP/1.1" 200 1401

192.168.123.240 - - [22/Jul/2016:11:48:16 +0900] "GET /asf-logo.png HTTP/1.1" 200 17811

192.168.123.240 - - [22/Jul/2016:11:48:16 +0900] "GET /bg-upper.png HTTP/1.1" 200 3103

192.168.123.240 - - [22/Jul/2016:11:48:16 +0900] "GET /bg-middle.png HTTP/1.1" 200 1918

192.168.123.240 - - [22/Jul/2016:11:48:16 +0900] "GET /bg-button.png HTTP/1.1" 200 713

192.168.123.240 - - [22/Jul/2016:11:48:16 +0900] "GET /favicon.ico HTTP/1.1" 200 21630

192.168.123.236 - - [22/Jul/2016:11:48:20 +0900] "GET /h2.html HTTP/1.1" 200 44

192.168.123.226 - - [22/Jul/2016:11:48:20 +0900] "GET /h1.html HTTP/1.1" 200 44

192.168.123.226 - - [22/Jul/2016:11:48:20 +0900] "GET /favicon.ico HTTP/1.1" 200 21630

192.168.123.240 - - [22/Jul/2016:11:48:21 +0900] "GET /h2.html HTTP/1.1" 200 44

192.168.123.241 - - [22/Jul/2016:11:48:21 +0900] "GET /h2.html HTTP/1.1" 200 44

192.168.123.235 - - [22/Jul/2016:11:48:23 +0900] "GET /h1.html HTTP/1.1" 200 44

192.168.123.235 - - [22/Jul/2016:11:48:23 +0900] "GET /favicon.ico HTTP/1.1" 200 21630




출력 방식은 다음과 같다.



메뉴 출력


웹 로그 분석


1. 로그분석

2. 통계보기

3. 엑셀저장

4. 종료



1번 메뉴 선택


================================

호스트 :

아이디:

유저네임:

시간:

요청정보:

상태코드:

바이트사이즈

================================


2. 통계분석


총 바이트 전송 사이즈 :

페이지별 전송 사이즈 :

h1:

h2:

h3:

h4:


....

================================

id별 방문 횟수

admin : ?? 번?

================================


총 뷰 횟수(로그라인수) :

페이지별 방문 횟수

h1 : ??

h2 : ??




Answer



import re

class View:

def __init__(self, inStr):

self.parseStr(inStr)


def parseStr(self, inStr):

# print(inStr)

match = re.search('([\d.]+)\s(\S+)\s(\S+)\s\[(.+)\]\s"(.+)"\s(\S+)\s(\S+)', inStr)

# print(match.groups())

self.host = match.group(1)

self.identity = match.group(2)

self.username = match.group(3)

self.time = match.group(4)

self.request = match.group(5)

self.status = match.group(6)

self.size = match.group(7)

class Statistics:

def __init__(self, fileName):

self.views = list() # list of 'View'

self.readFile(fileName)

self.makeStatistics()

def readFile(self, fileName):

with open(fileName, 'r') as fp:

while True:

inLine = fp.readline()

if not inLine:

break

self.views.append(View(inLine))

def makeStatisticsBytes(self):

self.statBytes = dict()

for aView in self.views:

try:

sizeByte = int(aView.size)

except Exception:

continue

if aView.host in self.statBytes:

self.statBytes[aView.host] += sizeByte

else:

self.statBytes[aView.host] = sizeByte

totalBytes = 0

for anItem in self.statBytes:

totalBytes += self.statBytes[anItem]

self.statBytes['total'] = totalBytes

# print('makeStatisticsBytes:', self.statBytes)

def makeStatisticsIp(self):

self.statIp = dict()

for aView in self.views:

if aView.host in self.statIp:

self.statIp[aView.host] += 1

else:

self.statIp[aView.host] = 1

# print('makeStatisticsIp:', self.statIp)

def makeStatisticsUser(self):

self.statUser = dict()

for aView in self.views:

if aView.username != '-':

if aView.username in self.statUser:

self.statUser[aView.username] += 1

else:

self.statUser[aView.username] = 1

# print('makeStatisticsUser:', self.statUser)

def makeStatisticsPage(self):

self.statPage = dict()

for aView in self.views:

# if aView.status != '200':

# continue

match = re.search('GET\s+(.+\.html)\s+HTTP.+', aView.request)

if match:

# print(match.group(1))

requestPage = match.group(1) 

if not(requestPage in self.statPage):

self.statPage[requestPage] = dict()

if aView.host in self.statPage[requestPage]:

self.statPage[requestPage][aView.host] += 1

else:

self.statPage[requestPage][aView.host] = 1

for aPage in self.statPage:

totalView = 0

for anItem in self.statPage[aPage]:

totalView += self.statPage[aPage][anItem]

self.statPage[aPage]['total'] = totalView

# print('makeStatisticsPage:', self.statPage)

def makeStatistics(self):

self.makeStatisticsBytes()

self.makeStatisticsIp()

self.makeStatisticsUser()

self.makeStatisticsPage()

def printResult(self):

print('='*80)

print('Parsing result')

for aView in self.views:

print(aView.host, aView.identity, aView.username, aView.time, aView.request, aView.status, aView.size)

print('-'*80)

print('Statistics: Bytes')

for anItem in self.statBytes:

print('%20s: %d' %(anItem, self.statBytes[anItem]))

print('-'*80)

print('Statistics: IP')

for anItem in self.statIp:

print('%20s: %d' %(anItem, self.statIp[anItem]))

print('-'*80)

print('Statistics: User')

for anItem in self.statUser:

print('%20s: %d' %(anItem, self.statUser[anItem]))

print('-'*80)

print('Statistics: Page')

for aPage in self.statPage:

print(aPage)

for anItem in self.statPage[aPage]:

print('%20s: %d' %(anItem, self.statPage[aPage][anItem]))

print('='*80)

if __name__ == '__main__':

st = Statistics('localhost_access_log.2016-02-26.txt')

st.printResult()



반응형

'Data > Python' 카테고리의 다른 글

Machine Learning #1 python  (0) 2018.09.18
파이썬 선(禪)(Zen of Python)  (0) 2016.09.22
Python 강좌(4)  (0) 2016.07.20
Python 강좌(3)  (0) 2016.07.20
Python 강좌(2)  (0) 2016.07.19