|
@@ -0,0 +1,386 @@
|
|
|
+
|
|
|
+
|
|
|
+--新增规则
|
|
|
+根据公告附件进行去重
|
|
|
+
|
|
|
+--1 中标公告 - 同[标题 、项目编号、项目名称] - 同中标人 - 同中标价(!=0) - 信息源>1
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,2,tenderee),1 from run_dumplicate_document_his
|
|
|
+where docchannel in (101,119,120)
|
|
|
+group by docchannel,doctitle_refine,win_tenderer,win_bid_price
|
|
|
+having doctitle_refine!="" and doctitle_refine is not NULL
|
|
|
+and win_tenderer!="" and win_bid_price!=""
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+-- 2. 中标公告 - 同项目编号- 同[项目名称、标题] - 同中标人 - 同中标价(!=0) - 同信息源=1
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,1,tenderee),2 from run_dumplicate_document_his
|
|
|
+where docchannel in (101,119,120)
|
|
|
+group by docchannel,project_code,project_name,win_tenderer,win_bid_price
|
|
|
+having project_code!="" and project_code is not NULL
|
|
|
+and project_name!="" and project_name is not NULL
|
|
|
+and win_tenderer!="" and win_bid_price!=""
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+--中标公告 编号 标题 中标人 中标价 站源=1
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,0,tenderee),3 from run_dumplicate_document_his
|
|
|
+where docchannel in (101,119,120)
|
|
|
+group by docchannel,project_code,doctitle_refine,win_tenderer,win_bid_price
|
|
|
+having project_code!="" and project_code is not NULL
|
|
|
+and doctitle_refine!="" and doctitle_refine is not NULL
|
|
|
+and win_tenderer!="" and win_bid_price=""
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+--招标 编号 标题 招标人 预算 站源=1
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,1,tenderee),4 from run_dumplicate_document_his
|
|
|
+where docchannel='52'
|
|
|
+group by project_code,doctitle_refine,tenderee,bidding_budget
|
|
|
+having project_code!="" and project_code is not NULL
|
|
|
+and doctitle_refine!="" and doctitle_refine is not NULL
|
|
|
+and tenderee!="" and tenderee is not NULL
|
|
|
+and bidding_budget!=""
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+-- 同一个招标人同一天采购同一样物品的时候,这个规则就不适用了
|
|
|
+-- insert into document_group_his(json_set_docid,rule_id)
|
|
|
+-- select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,2,tenderee),5 from run_dumplicate_document_his
|
|
|
+-- where docchannel='52'
|
|
|
+-- group by project_name,tenderee,bidding_budget
|
|
|
+-- having project_name!="" and project_name is not NULL
|
|
|
+-- and tenderee!="" and tenderee is not NULL
|
|
|
+-- and bidding_budget!="";
|
|
|
+
|
|
|
+--招标公告 编号 名称 预算 站源>1
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,2,tenderee),5 from run_dumplicate_document_his
|
|
|
+where docchannel not in (101,118,119,120)
|
|
|
+group by docchannel,project_code,project_name,bidding_budget
|
|
|
+having project_name!="" and project_name is not NULL
|
|
|
+and project_code!="" and project_code is not NULL
|
|
|
+and bidding_budget!=""
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+
|
|
|
+-- 4. 招标公告 - 同[标题 、项目编号、项目名称]- 同[招标人、代理公司] - 同预算 - 信息源>1
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,2,tenderee),6 from run_dumplicate_document_his
|
|
|
+where docchannel in (52,118)
|
|
|
+group by docchannel,project_name,agency,bidding_budget
|
|
|
+having project_name!="" and project_name is not NULL
|
|
|
+and agency!="" and agency is not NULL
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+-- 4. 招标公告 - 同[标题 、项目编号、项目名称]- 同[招标人、代理公司] - 同预算 - 信息源>1
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,2,tenderee),7 from run_dumplicate_document_his
|
|
|
+where docchannel in (52,118)
|
|
|
+group by docchannel,project_code,agency,bidding_budget
|
|
|
+having project_code!="" and project_code is not NULL
|
|
|
+and agency!="" and agency is not NULL
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+-- 7. 非中标公告 - 同项目名称 - 同发布日期 - 同招标人 - 同预算 - 同类型 - 信息源>1 - 同项目编号
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,2,tenderee),8 from run_dumplicate_document_his
|
|
|
+where docchannel not in (101,119,120)
|
|
|
+group by docchannel,project_name,page_time_stamp,tenderee,bidding_budget,project_code
|
|
|
+having project_name!="" and project_name is not NULL
|
|
|
+and page_time_stamp>0 and tenderee!="" and tenderee is not NULL
|
|
|
+and bidding_budget!="" and project_code!="" and project_code is not NULL
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+-- 3. 中标公告 - 同项目编号- 同[项目名称、标题] - 同中标人 - 同中标价(==0)
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,0,tenderee),9 from run_dumplicate_document_his
|
|
|
+where docchannel in (101,119,120)
|
|
|
+group by docchannel,project_code,project_name,win_tenderer,win_bid_price
|
|
|
+having project_code!="" and project_code is not NULL
|
|
|
+and project_name!="" and project_name is not NULL
|
|
|
+and win_tenderer!="" and win_bid_price=""
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+-- 8. 中标公告 - 同项目名称 - 同发布日期 - 同中标人 - 同中标价 - 同类型 - 信息源>1 - 同项目编号
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,2,tenderee),10 from run_dumplicate_document_his
|
|
|
+where docchannel in (101,119,120)
|
|
|
+group by docchannel,project_name,page_time_stamp,win_tenderer,win_bid_price,project_code
|
|
|
+having project_name!="" and project_name is not NULL
|
|
|
+and page_time_stamp>0 and win_tenderer!=""
|
|
|
+and win_bid_price!="" and project_code!="" and project_code is not NULL
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+-- -- 6. 不同公告类型 - 同原标题- 同日期
|
|
|
+-- insert into document_group_his(json_set_docid,rule_id)
|
|
|
+-- select f_set_docid(docid,page_time_stamp,extract_count,docchannel,2,tenderee),11 from run_dumplicate_document_his
|
|
|
+-- group by doctitle,page_time_stamp
|
|
|
+-- having doctitle!="" and doctitle is not NULL
|
|
|
+-- and page_time_stamp>0
|
|
|
+-- and count(1)>1;
|
|
|
+
|
|
|
+-- 4. 招标公告 - 同[标题 、项目编号、项目名称]- 同[招标人、代理公司] - 同预算 - 信息源>1
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,2,tenderee),12 from run_dumplicate_document_his
|
|
|
+where docchannel in (52,118)
|
|
|
+group by docchannel,doctitle_refine,tenderee,bidding_budget
|
|
|
+having doctitle_refine!="" and doctitle_refine is not NULL
|
|
|
+and tenderee!="" and tenderee is not NULL
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+-- 3. 中标公告 - 同项目编号- 同[项目名称、标题] - 同中标人 - 同中标价(==0)
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,1,tenderee),13 from run_dumplicate_document_his
|
|
|
+where docchannel='52'
|
|
|
+group by project_code,doctitle_refine,agency,bidding_budget
|
|
|
+having project_code!="" and project_code is not NULL
|
|
|
+and doctitle_refine!="" and doctitle_refine is not NULL
|
|
|
+and agency!="" and agency is not NULL
|
|
|
+and bidding_budget!=""
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+-- 公告内容完全相同的去重
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,0,1),0 from run_dumplicate_document_his
|
|
|
+group by fingerprint
|
|
|
+having length(fingerprint)>0
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+-- -- 同公告类型-同一天-[业主单位、预算、中标人、中标价、代理机构]中两个相同且不为空其余只有一个值
|
|
|
+-- insert into document_group_his(json_set_docid,rule_id)
|
|
|
+-- select f_set_docid_limitNum_contain(docid,page_time_stamp,extract_count,win_tenderer,tenderee,agency,1,doctitle_refine),35
|
|
|
+-- from run_dumplicate_document_his
|
|
|
+-- group by docchannel,page_time,win_bid_price,bidding_budget
|
|
|
+-- having length(win_bid_price)>0
|
|
|
+-- and length(bidding_budget)>0
|
|
|
+-- and count(1)>1
|
|
|
+-- ;
|
|
|
+
|
|
|
+-- -- 同公告类型-同一天-[业主单位、预算、中标人、中标价、代理机构]中两个相同且不为空其余只有一个值
|
|
|
+-- insert into document_group_his(json_set_docid,rule_id)
|
|
|
+-- select f_set_docid_limitNum_contain(docid,page_time_stamp,extract_count,win_tenderer,win_bid_price,tenderee,1,doctitle_refine),36
|
|
|
+-- from run_dumplicate_document_his
|
|
|
+-- group by docchannel,page_time,agency,bidding_budget
|
|
|
+-- having length(agency)>0
|
|
|
+-- and length(bidding_budget)>0
|
|
|
+-- and count(1)>1
|
|
|
+-- ;
|
|
|
+
|
|
|
+-- -- 同公告类型-同一天-[业主单位、预算、中标人、中标价、代理机构]中两个相同且不为空其余只有一个值
|
|
|
+-- insert into document_group_his(json_set_docid,rule_id)
|
|
|
+-- select f_set_docid_limitNum_contain(docid,page_time_stamp,extract_count,tenderee,bidding_budget,agency,1,doctitle_refine),37
|
|
|
+-- from run_dumplicate_document_his
|
|
|
+-- group by docchannel,page_time,win_tenderer,win_bid_price
|
|
|
+-- having length(win_tenderer)>0
|
|
|
+-- and length(win_bid_price)>0
|
|
|
+-- and count(1)>1
|
|
|
+-- ;
|
|
|
+
|
|
|
+-- -- 同公告类型-同一天-[业主单位、预算、中标人、中标价、代理机构]中两个相同且不为空其余只有一个值
|
|
|
+-- insert into document_group_his(json_set_docid,rule_id)
|
|
|
+-- select f_set_docid_limitNum_contain(docid,page_time_stamp,extract_count,tenderee,win_bid_price,bidding_budget,1,doctitle_refine),38
|
|
|
+-- from run_dumplicate_document_his
|
|
|
+-- group by docchannel,page_time,win_tenderer,agency
|
|
|
+-- having length(win_tenderer)>0
|
|
|
+-- and length(agency)>0
|
|
|
+-- and count(1)>1
|
|
|
+-- ;
|
|
|
+
|
|
|
+-- -- 同公告类型-同一天-[业主单位、预算、中标人、中标价、代理机构]中两个相同且不为空其余只有一个值
|
|
|
+-- insert into document_group_his(json_set_docid,rule_id)
|
|
|
+-- select f_set_docid_limitNum_contain(docid,page_time_stamp,extract_count,win_tenderer,tenderee,bidding_budget,1,doctitle_refine),39
|
|
|
+-- from run_dumplicate_document_his
|
|
|
+-- group by docchannel,page_time,win_bid_price,agency
|
|
|
+-- having length(win_bid_price)>0
|
|
|
+-- and length(agency)>0
|
|
|
+-- and count(1)>1
|
|
|
+-- ;
|
|
|
+
|
|
|
+
|
|
|
+-- 4. 招标公告 - 同[标题 、项目编号、项目名称]- 同[招标人、代理公司] - 同预算 - 信息源>1
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,2,tenderee),14 from run_dumplicate_document_his
|
|
|
+where docchannel in (52,118)
|
|
|
+group by docchannel,project_code,tenderee,bidding_budget
|
|
|
+having project_code!="" and project_code is not NULL
|
|
|
+and tenderee!="" and tenderee is not NULL
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+-- 2. 中标公告 - 同项目编号- 同[项目名称、标题] - 同中标人 - 同中标价(!=0) - 同信息源=1
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,1,tenderee),15 from run_dumplicate_document_his
|
|
|
+where docchannel in (101,119,120)
|
|
|
+group by docchannel,project_code,doctitle_refine,win_tenderer,win_bid_price
|
|
|
+having project_code!="" and project_code is not NULL
|
|
|
+and doctitle_refine!="" and doctitle_refine is not NULL
|
|
|
+and win_tenderer!="" and win_bid_price!=""
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+--1 中标公告 - 同[标题 、项目编号、项目名称] - 同中标人 - 同中标价(!=0) - 信息源>1
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,2,tenderee),16 from run_dumplicate_document_his
|
|
|
+where docchannel in (101,119,120)
|
|
|
+group by docchannel,project_code,win_tenderer,win_bid_price
|
|
|
+having project_code!="" and project_code is not NULL
|
|
|
+and win_tenderer!="" and win_bid_price!=""
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+--1 中标公告 - 同[标题 、项目编号、项目名称] - 同中标人 - 同中标价(!=0) - 信息源>1
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,2,tenderee),17 from run_dumplicate_document_his
|
|
|
+where docchannel in (101,119,120)
|
|
|
+group by docchannel,project_name,win_tenderer,win_bid_price
|
|
|
+having project_name!="" and project_name is not NULL
|
|
|
+and win_tenderer!="" and win_bid_price!=""
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+-- 4. 招标公告 - 同[标题 、项目编号、项目名称]- 同[招标人、代理公司] - 同预算 - 信息源>1
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,2,tenderee),18 from run_dumplicate_document_his
|
|
|
+where docchannel in (52,118)
|
|
|
+group by docchannel,doctitle_refine,agency,bidding_budget
|
|
|
+having doctitle_refine!="" and doctitle_refine is not NULL
|
|
|
+and agency!="" and agency is not NULL
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+-- 5. 招标公告 - 同项目编号- 同[项目名称、标题] - 同[招标人、代理公司] - 同预算(!=0) - 同信息源=1
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,1,tenderee),19 from run_dumplicate_document_his
|
|
|
+where docchannel='52'
|
|
|
+group by project_code,project_name,agency,bidding_budget
|
|
|
+having project_code!="" and project_code is not NULL
|
|
|
+and project_name!="" and project_name is not NULL
|
|
|
+and agency!="" and agency is not NULL
|
|
|
+and bidding_budget!=""
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+-- 5. 招标公告 - 同项目编号- 同[项目名称、标题] - 同[招标人、代理公司] - 同预算(!=0) - 同信息源=1
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,1,tenderee),20 from run_dumplicate_document_his
|
|
|
+where docchannel='52'
|
|
|
+group by project_code,project_name,tenderee,bidding_budget
|
|
|
+having project_code!="" and project_code is not NULL
|
|
|
+and project_name!="" and project_name is not NULL
|
|
|
+and tenderee!="" and tenderee is not NULL
|
|
|
+and bidding_budget!=""
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+-- 9.同['公告变更','拍卖出让','土地矿产','招标答疑']- 同[标题 、项目编号、项目名称]- 同[招标人、代理公司] - 同预算 - 同一天 - 不同数据源
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,2,tenderee),21 from run_dumplicate_document_his
|
|
|
+group by docchannel,doctitle_refine,tenderee,bidding_budget,page_time_stamp
|
|
|
+having docchannel in (51,103,115,116)
|
|
|
+and doctitle_refine!=""
|
|
|
+and tenderee!="" and tenderee is not NULL
|
|
|
+and bidding_budget!=""
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+-- 9.同['公告变更','拍卖出让','土地矿产','招标答疑']- 同[标题 、项目编号、项目名称]- 同[招标人、代理公司] - 同预算 - 同一天 - 不同数据源
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,2,tenderee),22 from run_dumplicate_document_his
|
|
|
+group by docchannel,project_code,tenderee,bidding_budget,page_time_stamp
|
|
|
+having docchannel in (51,103,115,116)
|
|
|
+and project_code!="" and project_code is not NULL
|
|
|
+and tenderee!="" and tenderee is not NULL
|
|
|
+and bidding_budget!=""
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+-- 9.同['公告变更','拍卖出让','土地矿产','招标答疑']- 同[标题 、项目编号、项目名称]- 同[招标人、代理公司] - 同预算 - 同一天 - 不同数据源
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,2,tenderee),23 from run_dumplicate_document_his
|
|
|
+group by docchannel,project_name,tenderee,bidding_budget,page_time_stamp
|
|
|
+having docchannel in (51,103,115,116)
|
|
|
+and project_name!="" and project_name is not NULL
|
|
|
+and tenderee!="" and tenderee is not NULL
|
|
|
+and bidding_budget!=""
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+-- 9.同['公告变更','拍卖出让','土地矿产','招标答疑']- 同[标题 、项目编号、项目名称]- 同[招标人、代理公司] - 同预算 - 同一天 - 不同数据源
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,2,tenderee),24 from run_dumplicate_document_his
|
|
|
+group by docchannel,doctitle_refine,agency,bidding_budget,page_time_stamp
|
|
|
+having docchannel in (51,103,115,116)
|
|
|
+and doctitle_refine!=""
|
|
|
+and agency!="" and agency is not NULL
|
|
|
+and bidding_budget!=""
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+-- 9.同['公告变更','拍卖出让','土地矿产','招标答疑']- 同[标题 、项目编号、项目名称]- 同[招标人、代理公司] - 同预算 - 同一天 - 不同数据源
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,2,tenderee),25 from run_dumplicate_document_his
|
|
|
+group by docchannel,project_code,agency,bidding_budget,page_time_stamp
|
|
|
+having docchannel in (51,103,115,116)
|
|
|
+and project_code!="" and project_code is not NULL
|
|
|
+and agency!="" and agency is not NULL
|
|
|
+and bidding_budget!=""
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+-- 9.同['公告变更','拍卖出让','土地矿产','招标答疑']- 同[标题 、项目编号、项目名称]- 同[招标人、代理公司] - 同预算 - 同一天 - 不同数据源
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select f_set_docid(docid,page_time_stamp,extract_count,web_source_no,2,tenderee),26 from run_dumplicate_document_his
|
|
|
+group by docchannel,project_name,agency,bidding_budget,page_time_stamp
|
|
|
+having docchannel in (51,103,115,116)
|
|
|
+and project_name!="" and project_name is not NULL
|
|
|
+and agency!="" and agency is not NULL
|
|
|
+and bidding_budget!=""
|
|
|
+and count(1)>1;
|
|
|
+
|
|
|
+-- -- 同公告类型-同一天-[业主单位、预算、中标人、中标价、代理机构]中两个相同且不为空其余只有一个值
|
|
|
+-- insert into document_group_his(json_set_docid,rule_id)
|
|
|
+-- select f_set_docid_limitNum_contain(docid,page_time_stamp,extract_count,win_tenderer,win_bid_price,agency,1,doctitle_refine),30
|
|
|
+-- from run_dumplicate_document_his
|
|
|
+-- group by docchannel,page_time,tenderee,bidding_budget
|
|
|
+-- having length(tenderee)>0
|
|
|
+-- and length(bidding_budget)>0
|
|
|
+-- and count(1)>1
|
|
|
+-- ;
|
|
|
+
|
|
|
+-- -- 同公告类型-同一天-[业主单位、预算、中标人、中标价、代理机构]中两个相同且不为空其余只有一个值
|
|
|
+-- insert into document_group_his(json_set_docid,rule_id)
|
|
|
+-- select f_set_docid_limitNum_contain(docid,page_time_stamp,extract_count,bidding_budget,win_bid_price,agency,1,doctitle_refine),31
|
|
|
+-- from run_dumplicate_document_his
|
|
|
+-- group by docchannel,page_time,tenderee,win_tenderer
|
|
|
+-- having length(tenderee)>0
|
|
|
+-- and length(win_tenderer)>0
|
|
|
+-- and count(1)>1
|
|
|
+-- ;
|
|
|
+
|
|
|
+-- -- 同公告类型-同一天-[业主单位、预算、中标人、中标价、代理机构]中两个相同且不为空其余只有一个值
|
|
|
+-- insert into document_group_his(json_set_docid,rule_id)
|
|
|
+-- select f_set_docid_limitNum_contain(docid,page_time_stamp,extract_count,win_tenderer,bidding_budget,agency,1,doctitle_refine),32
|
|
|
+-- from run_dumplicate_document_his
|
|
|
+-- group by docchannel,page_time,tenderee,win_bid_price
|
|
|
+-- having length(tenderee)>0
|
|
|
+-- and length(win_bid_price)>0
|
|
|
+-- and count(1)>1
|
|
|
+-- ;
|
|
|
+
|
|
|
+
|
|
|
+-- -- 同公告类型-同一天-[业主单位、预算、中标人、中标价、代理机构]中两个相同且不为空其余只有一个值
|
|
|
+-- insert into document_group_his(json_set_docid,rule_id)
|
|
|
+-- select f_set_docid_limitNum_contain(docid,page_time_stamp,extract_count,win_tenderer,win_bid_price,bidding_budget,1,doctitle_refine),33
|
|
|
+-- from run_dumplicate_document_his
|
|
|
+-- group by docchannel,page_time,tenderee,agency
|
|
|
+-- having length(tenderee)>0
|
|
|
+-- and length(agency)>0
|
|
|
+-- and count(1)>1
|
|
|
+-- ;
|
|
|
+
|
|
|
+
|
|
|
+-- -- 同公告类型-同一天-[业主单位、预算、中标人、中标价、代理机构]中两个相同且不为空其余只有一个值
|
|
|
+-- insert into document_group_his(json_set_docid,rule_id)
|
|
|
+-- select f_set_docid_limitNum_contain(docid,page_time_stamp,extract_count,tenderee,win_bid_price,agency,1,doctitle_refine),34
|
|
|
+-- from run_dumplicate_document_his
|
|
|
+-- group by docchannel,page_time,win_tenderer,bidding_budget
|
|
|
+-- having length(win_tenderer)>0
|
|
|
+-- and length(bidding_budget)>0
|
|
|
+-- and count(1)>1
|
|
|
+-- ;
|
|
|
+
|
|
|
+--标题和类型相同的公告分为 编号 预算 中标人 中标价 代理都为空 及其它 两组 对这两组的数据进行匹配 规则是招标人相同且站源不同
|
|
|
+insert into document_group_his(json_set_docid,rule_id)
|
|
|
+select F_SET_DOCID_BINARYCHART(docid,page_time_stamp,extract_count,project_code,project_name,tenderee,bidding_budget,win_tenderer,win_bid_price,agency,web_source_no),0
|
|
|
+from run_dumplicate_document_his
|
|
|
+where 1=1
|
|
|
+group by doctitle_refine,docchannel
|
|
|
+having length(doctitle_refine)>7 and count(1)>1;
|