featureEngine.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605
  1. import re
  2. import module.htmlDrawing as hd
  3. import numpy as np
  4. from keras.preprocessing.sequence import pad_sequences
  5. import time
  6. from bs4 import BeautifulSoup
  7. from module.Utils import *
  8. import math
  9. import json
  10. from _collections import OrderedDict
  11. import os
  12. scripts = '''
  13. function statisticIframe(nodes){
  14. var counts_communicateTags = 0;
  15. for(var i=0;i<nodes.length;i++){
  16. child = nodes[i]
  17. if (child.tagName!=null){
  18. if (child.tagName.toLowerCase() in {a:"",input:"",select:""} || child.onclick!=null){
  19. counts_communicateTags += 1;
  20. }
  21. if(child.tagName.toLowerCase()=="iframe"){
  22. if(child.contentWindow.document!=null){
  23. counts_communicateTags += statisticIframe(child.contentWindow.document.all);
  24. }
  25. }
  26. }
  27. }
  28. return counts_communicateTags;
  29. }
  30. function statistic(node,deepth){
  31. if(node.childNodes==null){
  32. node.counts_communicateTags = 0;
  33. return node.counts_communicateTags;
  34. }
  35. node.counts_communicateTags = 0;
  36. for(var i=0;i<node.childNodes.length;i++){
  37. child = node.childNodes[i];
  38. //删除标签
  39. /*
  40. if (child.tagName!=null){
  41. if (child.tagName.toLowerCase() in {head:"",script:"",meta:"",link:"",style:""} || child.nodeType==8 ){
  42. node.removeChild(child);
  43. continue;
  44. }
  45. }
  46. */
  47. if (child.tagName!=null){
  48. if (child.tagName.toLowerCase() in {a:"",input:"",select:""} || child.onclick!=null){
  49. node.counts_communicateTags += 1;
  50. }
  51. }
  52. /*if(child.tagName!=null && child.tagName.toLowerCase()=="iframe" && child.contentWindow.document!=null){
  53. node.counts_communicateTags += statisticIframe(child.contentWindow.document.all);
  54. }else{
  55. node.counts_communicateTags += statistic(child,deepth+1);
  56. }*/
  57. node.counts_communicateTags += statistic(child,deepth+1);
  58. }
  59. var innertext = node.innerText;
  60. if(innertext){
  61. var text = innertext.replace(/\s/g,'');
  62. //var text = innertext;
  63. node.counts_text = text.length;
  64. var punc = text.match(/;|,|。|:|、/g);
  65. var lines = innertext.match(/.{10}\\n/g);
  66. if(lines){
  67. node.counts_lines = lines.length;
  68. }else{
  69. node.counts_lines = 0;
  70. }
  71. if(punc){
  72. node['counts_punctuations']= punc.length;
  73. }else{
  74. node.counts_punctuations = 0;
  75. }
  76. }else{
  77. node.counts_lines = 0;
  78. node.counts_text = 0;
  79. node.counts_punctuations=0;
  80. }
  81. node.deepth = deepth;
  82. return node.counts_communicateTags;
  83. }
  84. function label(node,targethtml){
  85. var innerhtml = node.innerHTML;
  86. if(innerhtml){
  87. innerhtml = innerhtml.replace(/\s/g,'');
  88. sub_innerhtml = innerhtml.substring(0,40);
  89. if (sub_innerhtml==targethtml.substring(0,40)){
  90. return 1;
  91. }else{
  92. return 0;
  93. }
  94. }else{
  95. return 0;
  96. }
  97. }
  98. function getListFontSize(node,_array){
  99. if(node!=null && node.nodeType==1){
  100. _fontSize = parseInt(window.getComputedStyle(node).fontSize.match(/\d+/)[0]);
  101. if(_fontSize!=null){
  102. _array.push(_fontSize);
  103. }
  104. if(node.childNodes!=null){
  105. for(var i=0;i<node.childNodes.length;i++){
  106. var child = node.childNodes[i];
  107. getListFontSize(child,_array);
  108. }
  109. }
  110. }
  111. }
  112. function stastic_time(node,_array){
  113. var pattern_time = /\d{4}[\-\/::年.]\d{1,2}[\-\/::月.]\d{1,2}/g
  114. var _find_flag = false;
  115. if (node.childNodes==null){
  116. }else{
  117. for(var i=0;i<node.childNodes.length;i++){
  118. var childNode = node.childNodes[i];
  119. var _innerText = childNode.innerText;
  120. if (_innerText!=null && _innerText.search(pattern_time)>=0){
  121. stastic_time(childNode,_array);
  122. _find_flag = true;
  123. }
  124. }
  125. }
  126. if (!_find_flag && node!=document && node.tagName.toLowerCase()!='script'){
  127. _array_fontSize = new Array();
  128. getListFontSize(node,_array_fontSize);
  129. _array.push([getOffsetLeft(node),getOffsetTop(node),getListXpath(node,new Array()),Math.min(_array_fontSize)]);
  130. }
  131. return _array;
  132. }
  133. function search(){
  134. statistic(document,1);
  135. var objs = document.all;
  136. var data = new Array();
  137. for(var i=0;i<objs.length;i++){
  138. obj = objs[i];
  139. if (obj.offsetWidth>100 && obj.offsetHeight>100 && obj.parentNode.tagName!=null && obj.childNodes.length>0){
  140. maxArea = 0;
  141. child_maxArea = null;
  142. secondmaxArea = 0;
  143. child_secondmaxArea = null;
  144. for(var j =0;j<obj.childNodes.length;j++){
  145. if(obj.childNodes[j].offsetWidth!=null && obj.childNodes[j].offsetHeight!=null){
  146. if( obj.childNodes[j].offsetWidth*obj.childNodes[j].offsetHeight>maxArea){
  147. maxArea = obj.childNodes[j].offsetWidth*obj.childNodes[j].offsetHeight;
  148. child_maxArea = obj.childNodes[j];
  149. }
  150. if(obj.childNodes[j].offsetWidth*obj.childNodes[j].offsetHeight>secondmaxArea && obj.childNodes[j].offsetWidth*obj.childNodes[j].offsetHeight<maxArea){
  151. secondmaxArea = obj.childNodes[j].offsetWidth*obj.childNodes[j].offsetHeight;
  152. child_secondmaxArea = obj.childNodes[j];
  153. }
  154. }
  155. }
  156. _item = new Array();
  157. _item.push(getOffsetLeft(obj),getOffsetTop(obj),obj.offsetWidth,obj.offsetHeight,obj.deepth,obj.counts_communicateTags,obj.counts_lines,obj.counts_text,obj.counts_punctuations,
  158. getOffsetLeft(obj.parentNode),getOffsetTop(obj.parentNode),obj.parentNode.offsetWidth,obj.parentNode.offsetHeight,obj.parentNode.deepth,obj.parentNode.counts_communicateTags,obj.parentNode.counts_lines,obj.parentNode.counts_text,obj.parentNode.counts_punctuations)
  159. if(child_maxArea!=null){
  160. _item.push(getOffsetLeft(child_maxArea),getOffsetTop(child_maxArea),child_maxArea.offsetWidth,child_maxArea.offsetHeight,child_maxArea.deepth,child_maxArea.counts_communicateTags,child_maxArea.counts_lines,child_maxArea.counts_text,child_maxArea.counts_punctuations)
  161. }else{
  162. _item.push(-1,-1,-1,-1,-1,-1,-1,-1,-1)
  163. }
  164. if(child_secondmaxArea!=null){
  165. _item.push(getOffsetLeft(child_secondmaxArea),getOffsetTop(child_secondmaxArea),child_secondmaxArea.offsetWidth,child_secondmaxArea.offsetHeight,child_secondmaxArea.deepth,child_secondmaxArea.counts_communicateTags,child_secondmaxArea.counts_lines,child_secondmaxArea.counts_text,child_secondmaxArea.counts_punctuations)
  166. }else{
  167. _item.push(-1,-1,-1,-1,-1,-1,-1,-1,-1)
  168. }
  169. data.push([_item,obj.innerHTML,getListXpath(obj,new Array(),true)])
  170. }
  171. }
  172. var data_time = stastic_time(document,new Array());
  173. return([data,data_time]);
  174. }
  175. return (search());
  176. '''
  177. def statisticCommunicateTags(element):
  178. count = 0
  179. soup = BeautifulSoup(element.get_attribute("innerHTML"),"lxml")
  180. childs = soup.find_all(recursive=True)
  181. for child in childs:
  182. if child.name in ["a","input","select"] or "onclick" in child.attrs is not None:
  183. #print(child.text)
  184. count += 1
  185. return count
  186. def statisticPunctuationAndWords(element,punctuationWords_pattern = re.compile("[;,。:、]")):
  187. text = element.text
  188. text = re.sub("\r|\n|\s","",text)
  189. words_len = len(text)
  190. punctuation_len = len(re.findall(punctuationWords_pattern,text))
  191. return punctuation_len,words_len
  192. def encodeInput(url,target_source):
  193. def _method(args):
  194. try:
  195. url = args["url"]
  196. target_source = args["target_source"]
  197. browser = args["browser"]
  198. start_time = time.time()
  199. browser.get(url)
  200. print("get",time.time()-start_time)
  201. start_time = time.time()
  202. #browser.refresh()
  203. #time.sleep(1)
  204. browser.maximize_window()
  205. #elements = browser.find_elements_by_xpath("//*")
  206. ''''''
  207. findTags = ["div","table","tbody","tr","td","form","li","span"]
  208. MIN_WIDTH = 400
  209. MIN_HEIGHT = 400
  210. list_input = []
  211. input_x = []
  212. label_y = []
  213. for tag in findTags:
  214. for element in browser.find_elements_by_tag_name(tag):
  215. rect = element.rect
  216. x = rect["x"]
  217. y = rect["y"]
  218. width = rect["width"]
  219. height = rect["height"]
  220. if width>=MIN_WIDTH and height>=MIN_HEIGHT:
  221. list_input.append(element)
  222. print("search",time.time()-start_time)
  223. start_time = time.time()
  224. for element in list_input:
  225. communicateTags = statisticCommunicateTags(element)
  226. punctuation,words = statisticPunctuationAndWords(element)
  227. input_x.append([element.rect["x"],element.rect["y"],element.rect["width"],element.rect["height"],communicateTags,punctuation,words])
  228. label_y.append(labelElement(element, target_source))
  229. print("encode",time.time()-start_time)
  230. the_max = np.max(input_x,axis=0)
  231. the_max = np.array(list(the_max)[2:4]+list(the_max)[2:])
  232. input_x = np.array(input_x/the_max)
  233. if len(label_y)>0 and np.max(label_y)==1:
  234. return input_x,np.array(label_y)
  235. else:
  236. return None
  237. except Exception as e:
  238. print(e)
  239. return None
  240. args = {"url":url,"target_source":target_source}
  241. hd.executeMethod(_method, args)
  242. def getInput(url):
  243. def _method(args):
  244. try:
  245. url = args["url"]
  246. browser = args["browser"]
  247. start_time = time.time()
  248. browser.get(url)
  249. print("get",time.time()-start_time)
  250. start_time = time.time()
  251. #browser.refresh()
  252. #time.sleep(1)
  253. browser.maximize_window()
  254. #elements = browser.find_elements_by_xpath("//*")
  255. ''''''
  256. findTags = ["div","table","tbody","tr","td","form","li","span"]
  257. MIN_WIDTH = 400
  258. MIN_HEIGHT = 400
  259. list_input = []
  260. input_x = []
  261. label_y = []
  262. for tag in findTags:
  263. for element in browser.find_elements_by_tag_name(tag):
  264. rect = element.rect
  265. x = rect["x"]
  266. y = rect["y"]
  267. width = rect["width"]
  268. height = rect["height"]
  269. if width>=MIN_WIDTH and height>=MIN_HEIGHT:
  270. list_input.append(element)
  271. print("search",time.time()-start_time)
  272. start_time = time.time()
  273. for element in list_input:
  274. communicateTags = statisticCommunicateTags(element)
  275. punctuation,words = statisticPunctuationAndWords(element)
  276. input_x.append([element.rect["x"],element.rect["y"],element.rect["width"],element.rect["height"],communicateTags,punctuation,words])
  277. print("encode",time.time()-start_time)
  278. the_max = np.max(input_x,axis=0)
  279. the_max = np.array(list(the_max)[2:4]+list(the_max)[2:])
  280. input_x = np.array(input_x/the_max)
  281. return [np.expand_dims(input_x,0)]
  282. except Exception as e:
  283. print(e)
  284. return None
  285. args = {"url":url}
  286. hd.executeMethod(_method, args)
  287. def encodeInput_byJS(url,targethtml):
  288. def label(innerhtml,target_source):
  289. target_source =re.sub("[\r\n\s]","",str(target_source))
  290. pattern = ">(.*)<"
  291. target_source = re.findall(re.compile(pattern), target_source)[0]
  292. innerhtml = re.sub("[\r\n\s]","",str(innerhtml))
  293. #print(target_source[0:40])
  294. #print(element_source[0:40])
  295. #if target_source[0:10]==element_source[0:10] and target_source[-10:]==element_source[-10]:
  296. if target_source[0:60]==innerhtml[0:60]:
  297. return 1
  298. return 0
  299. def _method(args):
  300. try:
  301. url = args["url"]
  302. targethtml = args["targethtml"]
  303. browser = args["browser"]
  304. start = time.time()
  305. browser.get(url)
  306. _log = CLog()
  307. _log.write("get"+str(time.time()-start))
  308. browser.maximize_window()
  309. start = time.time()
  310. # data = browser.execute_script(scripts_common+scripts)
  311. data = get_js_rs(browser, scripts_common+scripts)
  312. input_x,list_inner = dealWithScriptOut(data)
  313. list_label = []
  314. for item in list_inner:
  315. list_label.append(label(item, targethtml))
  316. if len(list_label)>0 and np.max(list_label)==1:
  317. return input_x,np.array(list_label)
  318. else:
  319. return None
  320. print("cost",time.time()-start)
  321. except Exception as e:
  322. print(e)
  323. finally:
  324. pass
  325. return None
  326. args = {"url":url,"targethtml":targethtml}
  327. hd.executeMethod(_method, args)
  328. def getInput_byJS(browser, url):
  329. def label(innerhtml,target_source):
  330. target_source =re.sub("[\r\n\s]","",str(target_source))
  331. pattern = ">(.*)<"
  332. target_source = re.findall(re.compile(pattern), target_source)[0]
  333. innerhtml = re.sub("[\r\n\s]","",str(innerhtml))
  334. #print(target_source[0:40])
  335. #print(element_source[0:40])
  336. #if target_source[0:10]==element_source[0:10] and target_source[-10:]==element_source[-10]:
  337. if target_source[0:60]==innerhtml[0:60]:
  338. return 1
  339. return 0
  340. try:
  341. # browser = hd.getdriver()
  342. # debug("get driver")
  343. # hd.loadPage(browser, url)
  344. # browser.maximize_window()
  345. # data,data_time = browser.execute_script(scripts_common+scripts)
  346. data,data_time = get_js_rs(browser, scripts_common+scripts)
  347. log('获取正文、时间脚本执行完毕')
  348. input_x,list_inner,list_xpath = dealWithScriptOut(data)
  349. if input_x is not None:
  350. #return [np.expand_dims(np.transpose(pad_sequences(np.transpose(input_x,(1,0)), 155,padding="post", truncating="post", value=0,dtype="float32"),(1,0)),0)],list_inner
  351. return True,[[np.expand_dims(input_x,0)],list_inner,list_xpath,data_time]
  352. else:
  353. return False,""
  354. except Exception as e:
  355. error(str(e))
  356. err_msg = ""
  357. if re.search("frame",str(e)) is not None:
  358. err_msg = "#iframe#"
  359. return None,err_msg
  360. # finally:
  361. # hd.adddriver(browser)
  362. # debug("release driver")
  363. def dealWithScriptOut(data,key_index=4):
  364. list_input = []
  365. list_inner = []
  366. list_xpath = []
  367. for index in range(len(data)):
  368. #clean nan
  369. for i in range(len(data[index][0])):
  370. if data[index][0][i] is None or math.isnan(data[index][0][i]):
  371. data[index][0][i] = -1
  372. #order by deepth
  373. data.sort(key=lambda x:x[0][2]*x[0][3],reverse=True)
  374. for item in data:
  375. list_input.append(item[0])
  376. list_inner.append(item[1])
  377. list_xpath.append(item[2])
  378. #print(len(data))
  379. if len(list_input)>0:
  380. the_max = np.max(list_input,axis=0)
  381. the_max = np.array([x if x>0 else 1 for x in the_max])
  382. the_max = np.array((list(the_max)[2:4]+list(the_max)[2:9])*4)
  383. input_x = np.array(list_input/the_max)
  384. return input_x,list_inner,list_xpath
  385. else:
  386. return None,None,None
  387. def getResponseHeaders(browser):
  388. har = json.loads(browser.get_log('har')[0]['message'])
  389. print(har['log']['entries'])
  390. return OrderedDict(sorted([(header["name"], header["value"]) for header in har['log']['entries'][0]['General']], key = lambda x: x[0]))
  391. def getHttpStatus(browser):
  392. for responseReceived in browser.get_log('performance'):
  393. try:
  394. response = json.loads(responseReceived[u'message'])[u'message'][u'params'][u'response']
  395. if response[u'url'] == browser.current_url:
  396. return (response[u'status'], response[u'statusText'])
  397. except:
  398. pass
  399. return None
  400. def getHttpResponseHeader(browser):
  401. for responseReceived in browser.get_log('performance'):
  402. try:
  403. response = json.loads(responseReceived[u'message'])[u'message'][u'params'][u'response']
  404. print
  405. if response[u'url'] == browser.current_url:
  406. print(response)
  407. except:
  408. pass
  409. return response[u'headers']
  410. return None
  411. def labelElement(element,target_source):
  412. target_source =re.sub("[\r\n\s]","",str(target_source))
  413. pattern = ">(.*)<"
  414. target_source = re.findall(re.compile(pattern), target_source)[0]
  415. element_source = element.get_attribute("innerHTML")
  416. element_source = re.sub("[\r\n\s]","",str(element_source))
  417. #print(target_source[0:40])
  418. #print(element_source[0:40])
  419. #if target_source[0:10]==element_source[0:10] and target_source[-10:]==element_source[-10]:
  420. if target_source[0:60]==element_source[0:60]:
  421. return 1
  422. return 0
  423. def padding(all_data,pad=True):
  424. max_len = np.max([len(data[1]) for data in all_data])
  425. print("max_len",max_len)
  426. #max_len = 200
  427. list_x = []
  428. list_y = []
  429. list_url = []
  430. for data in all_data:
  431. input_x = data[0]
  432. label_y = data[1]
  433. url = data[2]
  434. if pad:
  435. input_x = np.transpose(pad_sequences(np.transpose(input_x,(1,0)), max_len,padding="post", truncating="post", value=0,dtype="float32"),(1,0))
  436. list_x.append(input_x)
  437. label_y = pad_sequences([label_y],max_len,padding="post", truncating="post", value=-1)[0]
  438. #list_y.append(label_y)
  439. list_y.append([(np.arange(2)==i).astype(np.integer) for i in label_y])
  440. else:
  441. #input_x = np.array(input_x)
  442. list_x.append([input_x])
  443. list_y.append([(np.arange(2)==i).astype(np.integer) for i in label_y])
  444. list_url.append(url)
  445. return [np.array(list_x),np.array(list_y),list_url]
  446. def getAllData():
  447. all_data = load("Link_Content.pk")
  448. data = []
  449. temp_file ="temp_data.pk"
  450. count = 0
  451. label = 0
  452. data_len = len(all_data)
  453. for row in all_data:
  454. count += 1
  455. print(str(label)+"/"+str(count)+"/"+str(data_len),row[0])
  456. #encode = encodeInput(row[0], row[1])
  457. if count%100==0:
  458. save(data,temp_file)
  459. encode = encodeInput_byJS(row[0], row[1])
  460. if encode:
  461. label += 1
  462. x,y = encode
  463. data.append([x,y,row[0]])
  464. else:
  465. print("None")
  466. data = padding(data)
  467. return data
  468. def augmentation(data,times=100):
  469. aug_data = []
  470. for item in data:
  471. x,y = item[0],item[1]
  472. new_item = []
  473. for i_x,i_y in zip(list(x),list(y)):
  474. new_item.append([i_x,i_y])
  475. aug_data.append(item)
  476. for _ in range(times):
  477. new_x = []
  478. new_y = []
  479. np.random.shuffle(new_item)
  480. for new_i in new_item:
  481. new_x.append(new_i[0])
  482. new_y.append(new_i[1])
  483. aug_data.append([new_x,new_y])
  484. return aug_data
  485. def dumpLinkContent():
  486. def trytosave(d):
  487. try:
  488. save(d,"1.pk")
  489. return 1
  490. except Exception as e:
  491. return 0
  492. import cx_Oracle as cx_Oracle
  493. conn=cx_Oracle.connect('bxkc/bxkc@192.168.2.54:1521/orcl') #连接数据库
  494. cursor=conn.cursor()
  495. sql = " select page_link,page_content from detail_content "
  496. cursor.execute(sql)
  497. rows = cursor.fetchall()
  498. data = []
  499. for row in rows:
  500. if trytosave(row)==1:
  501. data.append(row)
  502. save(data,"Link_Content.pk")
  503. def relabel(file_data="sourceData_36Input_28849_sort.pk"):
  504. '''
  505. @summary: 调整标注数据,解决上卷问题
  506. '''
  507. data = load(file_data)
  508. count = 0
  509. set_1 = set()
  510. set_2 = set()
  511. for page in data:
  512. _feature = page[0]
  513. _label = page[1]
  514. _url = page[2]
  515. _label_index = np.argmax(_label)
  516. _label_left = _feature[_label_index][0]
  517. _label_top = _feature[_label_index][1]
  518. _label_width = _feature[_label_index][2]
  519. _label_height = _feature[_label_index][3]
  520. _label_deepth = _feature[_label_index][4]
  521. _label_text = _feature[_label_index][7]
  522. _index = 0
  523. _re_deepth = 0
  524. _re_index = -1
  525. for _box in _feature:
  526. _left = _box[0]
  527. _top = _box[1]
  528. _width = _box[2]
  529. _height = _box[3]
  530. _deepth = _box[4]
  531. _text = _box[7]
  532. if _deepth>_label_deepth:
  533. if _left>=_label_left and _top>=_label_top and (_left+_width)<=(_label_left+_label_width) and (_top+_height)<=(_label_top+_label_height) and (_width*_height/(_label_width*_label_height)>0.7 or (_width*_height/(_label_width*_label_height)>0.5 and _text/_label_text>0.9)):
  534. set_1.add(_url)
  535. if _deepth>_re_deepth:
  536. _re_deepth = _deepth
  537. _re_index = _index
  538. _index += 1
  539. if _re_index>-1:
  540. _label[_label_index] = 0
  541. _label[_re_index] = 1
  542. print(_url)
  543. print(_label_index,_re_index)
  544. data.sort(key=lambda x:x[2])
  545. print(len(list(set_1)))
  546. save(data,"sourceData_36Input_"+str(len(data[1]))+"_relabel.pk")
  547. data = padding(data)
  548. save(data,"data_"+str(len(data[1]))+"_relabel.pk")
  549. return data
  550. if __name__=="__main__":
  551. #dumpLinkContent()
  552. '''
  553. relabel()
  554. '''
  555. data = getInput_byJS("http://hailing.taizhou.gov.cn/art/2019/5/23/art_50810_2498758.html")
  556. for item in data[3]:
  557. print(item)