|
@@ -496,17 +496,21 @@ class Dataflow_ActivteMQ_attachment(Dataflow_attachment):
|
|
def getAttachPath(self,filemd5,_dochtmlcon):
|
|
def getAttachPath(self,filemd5,_dochtmlcon):
|
|
_soup = BeautifulSoup(_dochtmlcon,"lxml")
|
|
_soup = BeautifulSoup(_dochtmlcon,"lxml")
|
|
|
|
|
|
- _find = _soup.find("a",attrs={"data":filemd5})
|
|
|
|
- filelink = ""
|
|
|
|
- if _find is None:
|
|
|
|
- _find = _soup.find("img",attrs={"data":filemd5})
|
|
|
|
- if _find is not None:
|
|
|
|
- filelink = _find.attrs.get("src","")
|
|
|
|
- else:
|
|
|
|
- filelink = _find.attrs.get("href","")
|
|
|
|
- _path = filelink.split("/file")
|
|
|
|
- if len(_path)>1:
|
|
|
|
- return _path[1]
|
|
|
|
|
|
+ list_mark = ["data","filelink"]
|
|
|
|
+ for _mark in list_mark:
|
|
|
|
+ _find = _soup.find("a",attrs={_mark:filemd5})
|
|
|
|
+ filelink = ""
|
|
|
|
+ if _find is None:
|
|
|
|
+ _find = _soup.find("img",attrs={_mark:filemd5})
|
|
|
|
+ if _find is not None:
|
|
|
|
+ filelink = _find.attrs.get("src","")
|
|
|
|
+ else:
|
|
|
|
+ filelink = _find.attrs.get("href","")
|
|
|
|
+ if filelink.find("bidizhaobiao")>=0:
|
|
|
|
+ _path = filelink.split("/file")
|
|
|
|
+ if len(_path)>1:
|
|
|
|
+ return _path[1]
|
|
|
|
+
|
|
|
|
|
|
def getAttach_json_fromRedis(self,filemd5):
|
|
def getAttach_json_fromRedis(self,filemd5):
|
|
db = self.redis_pool.getConnector()
|
|
db = self.redis_pool.getConnector()
|
|
@@ -590,15 +594,16 @@ class Dataflow_ActivteMQ_attachment(Dataflow_attachment):
|
|
log("getAttachments find in ots:%s"%(_filemd5))
|
|
log("getAttachments find in ots:%s"%(_filemd5))
|
|
list_attachment.append(Attachment_postgres(_attach_ots.getProperties()))
|
|
list_attachment.append(Attachment_postgres(_attach_ots.getProperties()))
|
|
else:
|
|
else:
|
|
- if _path[0]=="/":
|
|
|
|
- _path = _path[1:]
|
|
|
|
- _filetype = _path.split(".")[-1]
|
|
|
|
- _attach = {attachment_filemd5:_filemd5,
|
|
|
|
- attachment_filetype:_filetype,
|
|
|
|
- attachment_status:20,
|
|
|
|
- attachment_path:"%s/%s"%(_filemd5[:4],_path),
|
|
|
|
- attachment_crtime:getCurrent_date(format="%Y-%m-%d %H:%M:%S")}
|
|
|
|
- list_attachment.append(Attachment_postgres(_attach))
|
|
|
|
|
|
+ if _path:
|
|
|
|
+ if _path[0]=="/":
|
|
|
|
+ _path = _path[1:]
|
|
|
|
+ _filetype = _path.split(".")[-1]
|
|
|
|
+ _attach = {attachment_filemd5:_filemd5,
|
|
|
|
+ attachment_filetype:_filetype,
|
|
|
|
+ attachment_status:20,
|
|
|
|
+ attachment_path:"%s/%s"%(_filemd5[:4],_path),
|
|
|
|
+ attachment_crtime:getCurrent_date(format="%Y-%m-%d %H:%M:%S")}
|
|
|
|
+ list_attachment.append(Attachment_postgres(_attach))
|
|
|
|
|
|
|
|
|
|
|
|
|