getArticles.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. #coding:utf8
  2. import requests
  3. import json
  4. import pandas as pd
  5. id = []
  6. l_content = []
  7. l_tenderee = []
  8. l_agency = []
  9. l_win_tenderer = []
  10. l_first_tenderer = []
  11. l_second_tenderer = []
  12. l_third_tenderer = []
  13. for i in range(1):
  14. print(i)
  15. #type=tenderee,agency,tenderer,firsttenderer,secondtenderer,thirdtenderer
  16. page = requests.post("http://192.168.2.54:8081/data-bigdata-api-1.0.0/data/outPutData?num=3000&type=zhaobiao&param=tenderee,agency").text
  17. #page = requests.post("http://192.168.2.3:9090/data/outPutData?num=2&type=zhongbiao&param=tenderer").text
  18. #print(page)
  19. for item in json.loads(page,encoding="utf8"):
  20. id.append(item["document_id"])
  21. l_content.append(item["content"])
  22. l_tenderee.append(item["tenderee"])
  23. l_agency.append(item["agency"])
  24. l_win_tenderer.append("")
  25. l_first_tenderer.append("")
  26. l_second_tenderer.append("")
  27. l_third_tenderer.append("")
  28. '''
  29. for i in range(1):
  30. print(i)
  31. #type=tenderee,agency,tenderer,firsttenderer,secondtenderer,thirdtenderer
  32. page = requests.post("http://192.168.2.54:8081/data-bigdata-api-1.0.0/data/outPutData?num=1500&type=zhongbiao&param=tenderee,agency,tenderer,firsttenderer,secondtenderer,thirdtenderer").text
  33. #page = requests.post("http://192.168.2.3:9090/data/outPutData?num=2&type=zhongbiao&param=tenderer").text
  34. #print(page)
  35. for item in json.loads(page,encoding="utf8"):
  36. id.append(item["document_id"])
  37. l_content.append(item["content"])
  38. l_tenderee.append(item["tenderee"])
  39. l_agency.append(item["agency"])
  40. l_win_tenderer.append(item["win_tenderer"])
  41. l_first_tenderer.append(item["first_tenderer"])
  42. l_second_tenderer.append(item["second_tenderer"])
  43. l_third_tenderer.append(item["third_tenderer"])
  44. '''
  45. '''
  46. for i in range(1):
  47. print(i)
  48. #type=tenderee,agency,tenderer,firsttenderer,secondtenderer,thirdtenderer
  49. page = requests.post("http://192.168.2.54:8081/data-bigdata-api-1.0.0/data/outPutData?num=1000&not=true&type=zhongbiao&param=tenderer").text
  50. #page = requests.post("http://192.168.2.3:9090/data/outPutData?num=2&type=zhongbiao&param=tenderer").text
  51. #print(page)
  52. for item in json.loads(page,encoding="utf8"):
  53. id.append(item["document_id"])
  54. l_content.append(item["content"])
  55. l_tenderee.append("")
  56. l_agency.append("")
  57. l_win_tenderer.append("")
  58. l_first_tenderer.append("")
  59. l_second_tenderer.append("")
  60. l_third_tenderer.append("")
  61. '''
  62. dataframe = pd.DataFrame({'id':id,'content':l_content,"tenderee":l_tenderee,"agency":l_agency,"win_tenderer":l_win_tenderer,"first_tenderer":l_first_tenderer,"second_tenderer":l_second_tenderer,"third_tenderer":l_third_tenderer})
  63. columns = ['id','content',"tenderee","agency","win_tenderer","first_tenderer","second_tenderer","third_tenderer"]
  64. dataframe.to_csv("articles.csv",index=False,header=False,sep=",",encoding="utf8",columns=columns)