12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788 |
- '''
- Created on 2019年11月14日
- @author: User
- '''
- import cx_Oracle as cx_Oracle
- import json
- import re
- import codecs
- def val():
-
- conn=cx_Oracle.connect('bxkc/bxkc@192.168.2.54:1521/orcl') #连接数据库
- cursor=conn.cursor()
- sql = " select type,CRAWLER_RESULT,CRAWLER_LINK from bxkc.BXKC_CRAWLER_RESULT where to_date(create_time,'yyyy-mm-dd hh24:mi:ss')>=to_date('2019-12-12 21:00:00','yyyy-mm-dd hh24:mi:ss') and type=1 and rownum<=400 "
- cursor.execute(sql)
- rows = cursor.fetchall()
- _dict = dict()
- with codecs.open("errorLink.txt","w",encoding="utf8") as f:
- for row in rows:
- _type = row[0]
- _result = str(row[1])
- _link = row[2]
- if not _type in _dict:
- _dict[_type] = [1,dict()]
- else:
- _dict[_type][0] += 1
- '''
- json_result = json.loads(_result,encoding="utf8")
- err_msg = json_result["err_msg"]
- set_info = set()
- for _info in err_msg.split("#"):
- if _info!="":
- set_info.add("err_msg"+_info)
- if "errorInfo" in json_result:
- if type(json_result["errorInfo"])==str:
- if re.search("{",json_result["errorInfo"]) is not None:
- errorInfo = json.loads(json_result["errorInfo"],encoding="utf8")
- else:
- errorInfo = {"123":json_result["errorInfo"]}
- else:
- errorInfo = json_result["errorInfo"]
-
- for _error in errorInfo.keys():
- _info = errorInfo[_error]
- set_info.add("errorInfo"+_info)
-
- for _info in set_info:
- if not _info in _dict[_type][1].keys():
- _dict[_type][1][_info] = 1
- else:
- _dict[_type][1][_info] += 1
- '''
- if _type==1: #and "err_msg翻页链接不匹配" in set_info:
- f.write(_link+"\n")
- for _key in _dict.keys():
- _sum = _dict[_key][0]
- print("type:",_key," sum:",_sum)
- for _key1 in _dict[_key][1].keys():
- _count = _dict[_key][1][_key1]
- print("-->","count:",_count,"\tpercent:",round(_count/_sum,3),"info:",_key1)
-
- def analyzeLog():
- with codecs.open("test.log","r",encoding="gbk") as f:
- dict_thread_get_release = dict()
- while(True):
- line = f.readline().strip()
- if not line:
- break
-
- _key = " ".join(line.split()[-2:])
- if _key not in dict_thread_get_release:
- dict_thread_get_release[_key] = [0,0]
- if re.search("debug\(\"get driver\"\)",line) is not None:
- dict_thread_get_release[_key][0] += 1
- if re.search("debug\(\"release driver\"\)",line) is not None:
- dict_thread_get_release[_key][1] += 1
- for _key in dict_thread_get_release.keys():
- if dict_thread_get_release[_key][0]!=dict_thread_get_release[_key][1]:
- print(_key)
-
- if __name__=="__main__":
- val()
- #analyzeLog()
-
-
|