12345678910111213141516171819202122232425262728 |
- '''
- Created on 2019年8月8日
- @author: User
- '''
- import re
- if __name__=="__main__":
- main_url = "www.baidu.com/343/acb/36.psp"
- next_page_url = "www.baidu.com/343/ab/1.html"
- print(re.split('(\d+)',main_url))
- main_href = set(re.split('[/|&|?]', main_url))
- tmp_href = set(re.split('[/|&|?]', next_page_url))
- tmp_href_ele1 = list(tmp_href - main_href)
- tmp_href_ele2 = list(main_href - tmp_href)
- print(tmp_href_ele1)
- print(tmp_href_ele2)
- tmp_href_digit_list1 = re.findall(r'\d+', tmp_href_ele1[0])
- tmp_href_digit_list2 = re.findall(r'\d+', tmp_href_ele2[0])
- tmp_href_digit_list = list(set(tmp_href_digit_list1) - set(tmp_href_digit_list2))
- begin = next_page_url.find(tmp_href_ele1[0])
- end = begin + len(tmp_href_ele1[0])
- first_end = next_page_url.find(tmp_href_digit_list[0], begin, end)
- second_begin = first_end + len(tmp_href_digit_list[0])
- first_part = next_page_url[:first_end]
- second_part = next_page_url[second_begin:]
-
- print(first_part,second_part)
|