val.py 3.2 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. '''
  2. Created on 2019年11月14日
  3. @author: User
  4. '''
  5. import cx_Oracle as cx_Oracle
  6. import json
  7. import re
  8. import codecs
  9. def val():
  10. conn=cx_Oracle.connect('bxkc/bxkc@192.168.2.54:1521/orcl') #连接数据库
  11. cursor=conn.cursor()
  12. sql = " select type,CRAWLER_RESULT,CRAWLER_LINK from bxkc.BXKC_CRAWLER_RESULT where to_date(create_time,'yyyy-mm-dd hh24:mi:ss')>=to_date('2019-12-12 21:00:00','yyyy-mm-dd hh24:mi:ss') and type=1 and rownum<=400 "
  13. cursor.execute(sql)
  14. rows = cursor.fetchall()
  15. _dict = dict()
  16. with codecs.open("errorLink.txt","w",encoding="utf8") as f:
  17. for row in rows:
  18. _type = row[0]
  19. _result = str(row[1])
  20. _link = row[2]
  21. if not _type in _dict:
  22. _dict[_type] = [1,dict()]
  23. else:
  24. _dict[_type][0] += 1
  25. '''
  26. json_result = json.loads(_result,encoding="utf8")
  27. err_msg = json_result["err_msg"]
  28. set_info = set()
  29. for _info in err_msg.split("#"):
  30. if _info!="":
  31. set_info.add("err_msg"+_info)
  32. if "errorInfo" in json_result:
  33. if type(json_result["errorInfo"])==str:
  34. if re.search("{",json_result["errorInfo"]) is not None:
  35. errorInfo = json.loads(json_result["errorInfo"],encoding="utf8")
  36. else:
  37. errorInfo = {"123":json_result["errorInfo"]}
  38. else:
  39. errorInfo = json_result["errorInfo"]
  40. for _error in errorInfo.keys():
  41. _info = errorInfo[_error]
  42. set_info.add("errorInfo"+_info)
  43. for _info in set_info:
  44. if not _info in _dict[_type][1].keys():
  45. _dict[_type][1][_info] = 1
  46. else:
  47. _dict[_type][1][_info] += 1
  48. '''
  49. if _type==1: #and "err_msg翻页链接不匹配" in set_info:
  50. f.write(_link+"\n")
  51. for _key in _dict.keys():
  52. _sum = _dict[_key][0]
  53. print("type:",_key," sum:",_sum)
  54. for _key1 in _dict[_key][1].keys():
  55. _count = _dict[_key][1][_key1]
  56. print("-->","count:",_count,"\tpercent:",round(_count/_sum,3),"info:",_key1)
  57. def analyzeLog():
  58. with codecs.open("test.log","r",encoding="gbk") as f:
  59. dict_thread_get_release = dict()
  60. while(True):
  61. line = f.readline().strip()
  62. if not line:
  63. break
  64. _key = " ".join(line.split()[-2:])
  65. if _key not in dict_thread_get_release:
  66. dict_thread_get_release[_key] = [0,0]
  67. if re.search("debug\(\"get driver\"\)",line) is not None:
  68. dict_thread_get_release[_key][0] += 1
  69. if re.search("debug\(\"release driver\"\)",line) is not None:
  70. dict_thread_get_release[_key][1] += 1
  71. for _key in dict_thread_get_release.keys():
  72. if dict_thread_get_release[_key][0]!=dict_thread_get_release[_key][1]:
  73. print(_key)
  74. if __name__=="__main__":
  75. val()
  76. #analyzeLog()