|
@@ -754,7 +754,8 @@ class Dataflow_ActivteMQ_extract(Dataflow_extract):
|
|
|
|
|
|
html_len = len(_dochtmlcon)
|
|
|
if html_len>200000:
|
|
|
-
|
|
|
+ if int(item.get("docid"))==238431011:
|
|
|
+ save(item,"238431011.pk")
|
|
|
try:
|
|
|
_soup = BeautifulSoup(_dochtmlcon,"lxml")
|
|
|
_soup = article_limit(_soup,200000)
|