I am trying to write a puthon code that will download the pom file linked to a java package hosted on maven repository.
I am able to perform a search using
url = "https://search.maven.org/solrsearch/select?q="
src = "g:javax.servlet a:javax.servlet-api v:4.0.1"
response = requests.get(url + src)
result = response.json()["response"]
match = result["docs"][0]
print(match)
but a json is returned and do files are found inside.
Even if I manage to get to this page:
Any idea?
This is what I used:
import requests
class Downlaoder:
def __init__(self):
self.base = "https://repo1.maven.org/maven2/" # base url
# -----------------------------------------------------------------------------
def download(self, g, a, v, extension="pom"):
'''
This method will manage the different parts of the downloading process
:param g: groupID
:param a: artefactID
:param v: artefact version
:param ext: set pom if you want to create a link for the pom file,
set jar if you want to create a link for the jar file
:return:
'''
self.g = g
self.a = a
self.v = v
url = self.gav_to_url(g, a, v, extension) #create the url from the GAV format
#print("jar url = ", url)
if self.is_downloadable(url):
return self.perform_download(url) #return the downloaded file
else:
print(f"The url = {url} is not a downloadable URL")
exit(1)
return 0
# -----------------------------------------------------------------------------
def gav_to_url(self, g, a, v, ext):
'''
This method creates the JAR or POM file link from G:A:V coordinates
:param g: groupID
:param a: artefactID
:param v: artefact version
:param ext: set pom if you want to create a link for the pom file,
set jar if you want to create a link for the jar file
:return: url pointing to the desired file: jar or pom
'''
gid = g.replace(".", "/")
return self.base + gid + "/" + a + "/" + v + "/" + a + "-" + v + "." + ext
# -----------------------------------------------------------------------------
def perform_download(self, url):
'''
Do the downlaod of the file
:param url: url of the file to be downloaded
:return: downloaded file
'''
filename = url.split("/")[-1]
#print("Filename = ", filename)
try:
response = requests.get(url, allow_redirects=True)
open(filename, "wb").write(response.content) # overwritting file in case it exists
except BaseException as be:
print(f"Something went wrong while downloading: {url} and the following exception was raised: {be}. Exiting!"
f"This operation is mandatory, exiting!")
exit(1)
return filename
# -----------------------------------------------------------------------------
def is_downloadable(self, url):
"""
Does the url contain a downloadable resource ? Checking it examining only the header
I am aiming for a POM or JAR file, other files will be ignored
:param url: url of the file to be checked
:return: True if a target file is actually a POM or JAR file, False otherwise
"""
h = requests.head(url, allow_redirects=True)
header = h.headers
content_type = header.get('content-type')
if "text/xml" in content_type.lower() or "application/java-archive" in content_type.lower():
return True
else:
print(f"this url doesn't point nor to a POM file neighter to a JAR file but to a {content_type.lower()}")
return False
You call it like this:
# POM / JAR downloader
jd = Downlaoder()
pom_file_name = jd.download(g, a, v)