luojiehua
/
BIDI_ML_INFO_EXTRACTION


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
							#coding:UTF8
'''
Created on 2019年1月4日

@author: User
'''

from bs4 import BeautifulSoup, Comment
import copy
import re
import sys
import os
import codecs
import requests
import time

import logging
import json
global json,logging
logging.basicConfig(level = logging.INFO,format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s')

import json
import random

def test(name,content,_url=None):
    user = {
            "content": content,
            "doc_id":name,
            "timeout":200,
            "original_docchannel":101
            }
    myheaders = {'Content-Type': 'application/json',"Authorization":"NzZmOWZlMmU2MGY3YmQ4MDBjM2E5MDAyZjhjNjQ0MzZlMmE0NTMwZg=="}

    list_url = ["http://127.0.0.1:15030/content_extract",
                "http://127.0.0.1:15031/content_extract",
                "http://127.0.0.1:15032/content_extract",
                "http://127.0.0.1:15033/content_extract",
                "http://127.0.0.1:15034/content_extract",
                "http://127.0.0.1:15035/content_extract",
                "http://127.0.0.1:15036/content_extract",
                "http://127.0.0.1:15037/content_extract",
                ]
    # _i = random.randint(0,len(list_url)-1)
    # _resp = requests.post(list_url[_i], json=user, headers=myheaders, verify=True)

    # _url = "http://1255640119316927.cn-hangzhou.pai-eas.aliyuncs.com/api/predict/content_extract"
    _url = "http://192.168.2.102:15030/content_extract"
    # _url = "http://127.0.0.1:15030/content_extract"
    _resp = requests.post(_url, json=user, headers=myheaders, verify=True)
    # _resp = requests.post("http://192.168.2.102:15000" + '/article_extract', json=user, headers=myheaders, verify=True)
    resp_json = _resp.content.decode("utf-8")
    logging.info("%d===%s"%(_resp.status_code,resp_json[:10]))

    return resp_json

def presure_test():

    from BiddingKG.dl.common.multiThread import MultiThreadHandler
    from queue import Queue
    text = codecs.open("C:\\Users\\\Administrator\\Desktop\\2.html","r",encoding="utf8").read()
    content = str(BeautifulSoup(text).find("div",id="pcontent"))


    start_time = time.time()
    task_queue = Queue()
    for i in range(3000):
        task_queue.put(text)
    def _handle(item,result_queue):
        test("",item)
    mt = MultiThreadHandler(task_queue,_handle,None,1)
    mt.run()
    end_time = time.time()
    print("all takes :%ds"%(end_time-start_time))

def runlocal(content):
    import sys
    import os
    sys.path.append(os.path.abspath("../.."))
    import fool
    from BiddingKG.dl.interface.extract import predict

    predict("12", content,"打印机",original_docchannel=101)

def run_one():
    from BiddingKG.dl.interface.extract import predict
    # filename = "比地_52_79929693.html"
    #text = codecs.open("C:\\Users\\User\\Desktop\\数据20191014\\"+filename,"r",encoding="utf8").read()
    text = codecs.open("C:\\Users\\\Administrator\\Desktop\\2.html","r",encoding="utf8").read()
    content = str(BeautifulSoup(text).find("div",id="pcontent"))
    a = time.time()
    # text = '''
    # 购安装工程二标段，第一中标候选人，投标人名称，南阳市宝琛装饰工程有限责任公司，投标报价:147892
    # '''
    print("start")
    _time1 = time.time()
    print(predict("12", text,"打印机",original_docchannel=101))
    # test(12,content)
    # test(12,text)
    print("takes",time.time()-a)
    pass

if __name__=="__main__":
    # presure_test()
    run_one()