class Solution:
def crawl(self, startUrl: str, htmlParser: 'HtmlParser') -> List[str]:
def get_hostname(url):
# split url by slashes
# for instance, "http://example.org/foo/bar" will be split into
# "http:", "", "example.org", "foo", "bar"
# the hostname is the 2-nd (0-indexed) element
return url.split('/')[2]
start_hostname = get_hostname(startUrl)
visited = set()
def dfs(url, htmlParser):
visited.add(url)
for next_url in htmlParser.getUrls(url):
if get_hostname(next_url) == start_hostname and next_url not in visited:
dfs(next_url, htmlParser)
dfs(startUrl, htmlParser)
return visitedWhere is the number of edges in the graph, and is the maximum length of a URL (
urls[i].length).
class Solution:
def crawl(self, startUrl: str, htmlParser: 'HtmlParser') -> List[str]:
def get_hostname(url):
# split url by slashes
# for instance, "http://example.org/foo/bar" will be split into
# "http:", "", "example.org", "foo", "bar"
# the hostname is the 2-nd (0-indexed) element
return url.split('/')[2]
start_hostname = get_hostname(startUrl)
q = collections.deque([startUrl])
visited = set([startUrl])
while q:
url = q.popleft()
for next_url in htmlParser.getUrls(url):
if get_hostname(next_url) == start_hostname and next_url not in visited:
q.append(next_url)
visited.add(next_url)
return visitedWhere is the number of edges in the graph, is the maximum length of a URL (
urls[i].length), and is the total number of URLs (urls.length).