''' Created on 2019年11月14日 @author: User ''' import cx_Oracle as cx_Oracle import json import re import codecs def val(): conn=cx_Oracle.connect('bxkc/bxkc@192.168.2.54:1521/orcl') #连接数据库 cursor=conn.cursor() sql = " select type,CRAWLER_RESULT,CRAWLER_LINK from bxkc.BXKC_CRAWLER_RESULT where to_date(create_time,'yyyy-mm-dd hh24:mi:ss')>=to_date('2019-12-12 21:00:00','yyyy-mm-dd hh24:mi:ss') and type=1 and rownum<=400 " cursor.execute(sql) rows = cursor.fetchall() _dict = dict() with codecs.open("errorLink.txt","w",encoding="utf8") as f: for row in rows: _type = row[0] _result = str(row[1]) _link = row[2] if not _type in _dict: _dict[_type] = [1,dict()] else: _dict[_type][0] += 1 ''' json_result = json.loads(_result,encoding="utf8") err_msg = json_result["err_msg"] set_info = set() for _info in err_msg.split("#"): if _info!="": set_info.add("err_msg"+_info) if "errorInfo" in json_result: if type(json_result["errorInfo"])==str: if re.search("{",json_result["errorInfo"]) is not None: errorInfo = json.loads(json_result["errorInfo"],encoding="utf8") else: errorInfo = {"123":json_result["errorInfo"]} else: errorInfo = json_result["errorInfo"] for _error in errorInfo.keys(): _info = errorInfo[_error] set_info.add("errorInfo"+_info) for _info in set_info: if not _info in _dict[_type][1].keys(): _dict[_type][1][_info] = 1 else: _dict[_type][1][_info] += 1 ''' if _type==1: #and "err_msg翻页链接不匹配" in set_info: f.write(_link+"\n") for _key in _dict.keys(): _sum = _dict[_key][0] print("type:",_key," sum:",_sum) for _key1 in _dict[_key][1].keys(): _count = _dict[_key][1][_key1] print("-->","count:",_count,"\tpercent:",round(_count/_sum,3),"info:",_key1) def analyzeLog(): with codecs.open("test.log","r",encoding="gbk") as f: dict_thread_get_release = dict() while(True): line = f.readline().strip() if not line: break _key = " ".join(line.split()[-2:]) if _key not in dict_thread_get_release: dict_thread_get_release[_key] = [0,0] if re.search("debug\(\"get driver\"\)",line) is not None: dict_thread_get_release[_key][0] += 1 if re.search("debug\(\"release driver\"\)",line) is not None: dict_thread_get_release[_key][1] += 1 for _key in dict_thread_get_release.keys(): if dict_thread_get_release[_key][0]!=dict_thread_get_release[_key][1]: print(_key) if __name__=="__main__": val() #analyzeLog()