1.py 1.0 KB

12345678910111213141516171819202122232425262728
  1. '''
  2. Created on 2019年8月8日
  3. @author: User
  4. '''
  5. import re
  6. if __name__=="__main__":
  7. main_url = "www.baidu.com/343/acb/36.psp"
  8. next_page_url = "www.baidu.com/343/ab/1.html"
  9. print(re.split('(\d+)',main_url))
  10. main_href = set(re.split('[/|&|?]', main_url))
  11. tmp_href = set(re.split('[/|&|?]', next_page_url))
  12. tmp_href_ele1 = list(tmp_href - main_href)
  13. tmp_href_ele2 = list(main_href - tmp_href)
  14. print(tmp_href_ele1)
  15. print(tmp_href_ele2)
  16. tmp_href_digit_list1 = re.findall(r'\d+', tmp_href_ele1[0])
  17. tmp_href_digit_list2 = re.findall(r'\d+', tmp_href_ele2[0])
  18. tmp_href_digit_list = list(set(tmp_href_digit_list1) - set(tmp_href_digit_list2))
  19. begin = next_page_url.find(tmp_href_ele1[0])
  20. end = begin + len(tmp_href_ele1[0])
  21. first_end = next_page_url.find(tmp_href_digit_list[0], begin, end)
  22. second_begin = first_end + len(tmp_href_digit_list[0])
  23. first_part = next_page_url[:first_end]
  24. second_part = next_page_url[second_begin:]
  25. print(first_part,second_part)