Source code for pytrials.client
from pytrials.utils import json_handler, csv_handler
from pytrials import study_fields
import csv
[docs]
class ClinicalTrials:
"""ClinicalTrials API client
Provides functions to easily access the ClinicalTrials.gov API
(https://classic.clinicaltrials.gov/api/)
in Python.
Attributes:
study_fields: List of all study fields you can use in your query.
api_info: Tuple containing the API version number and the last
time the database was updated.
"""
_BASE_URL = "https://clinicaltrials.gov/api/v2/"
_JSON = "format=json"
_CSV = "format=csv"
def __init__(self):
self.api_info = self.__api_info()
@property
def study_fields(self):
"""List of all study fields you can use in your query."""
csv_fields = []
json_fields = []
with open(study_fields, "r") as f:
reader = csv.DictReader(f)
for row in reader:
csv_fields.append(row["Column Name"])
json_fields.append(row["Included Data Fields"].split("|"))
return {
"csv": csv_fields,
"json": [item for sublist in json_fields for item in sublist],
}
def __api_info(self):
"""Returns information about the API"""
req = json_handler(f"{self._BASE_URL}version")
last_updated = req["dataTimestamp"]
api_version = req["apiVersion"]
return api_version, last_updated
[docs]
def get_full_studies(self, search_expr, max_studies=50, fmt="csv"):
"""Returns all content for a maximum of 100 study records.
Retrieves information from the full studies endpoint, which gets all study fields.
This endpoint can only output JSON (Or not-supported XML) format and does not allow
requests for more than 100 studies at once.
Args:
search_expr (str): A string containing a search expression as specified by
`their documentation <https://clinicaltrials.gov/api/gui/ref/syntax#searchExpr>`_.
max_studies (int): An integer indicating the maximum number of studies to return.
Defaults to 50.
Returns:
dict: Object containing the information queried with the search expression.
Raises:
ValueError: The number of studies can only be between 1 and 100
"""
if fmt == "csv":
format = self._CSV
handler = csv_handler
elif fmt == "json":
format = self._JSON
handler = json_handler
else:
raise ValueError("Format argument has to be either 'csv' or 'json")
if max_studies > 1000 or max_studies < 1:
raise ValueError("The number of studies can only be between 1 and 1000")
req = f"studies?{format}&markupFormat=legacy&query.term={search_expr}&pageSize={max_studies}"
full_studies = handler(f"{self._BASE_URL}{req}")
return full_studies
[docs]
def get_study_fields(
self, search_expr, fields, max_studies=50, fmt="csv"
):
"""Returns study content for specified fields
Retrieves information from the study fields endpoint, which acquires specified information
from a large (max 1000) studies. To see a list of all possible fields, check the class'
study_fields attribute.
Args:
search_expr (str): A string containing a search expression as specified by
`their documentation <https://clinicaltrials.gov/api/gui/ref/syntax#searchExpr>`_.
fields (list(str)): A list containing the desired information fields.
max_studies (int): An integer indicating the maximum number of studies to return.
Defaults to 50.
min_rnk (int): Minimum Rank sets the lower limit on the range of study records used to return results.
If absent, defaults to 1.
fmt (str): A string indicating the output format, csv or json. Defaults to csv.
Returns:
Either a dict, if fmt='json', or a list of records (e.g. a list of lists), if fmt='csv.
Both containing the maximum number of study fields queried using the specified search expression.
Raises:
ValueError: The number of studies can only be between 1 and 1000
ValueError: One of the fields is not valid! Check the study_fields attribute
for a list of valid ones.
ValueError: Format argument has to be either 'csv' or 'json'
"""
if fmt == "csv":
format = self._CSV
handler = csv_handler
elif fmt == "json":
format = self._JSON
handler = json_handler
else:
raise ValueError("Format argument has to be either 'csv' or 'json")
if max_studies > 1000 or max_studies < 1:
raise ValueError("The number of studies can only be between 1 and 1000")
elif not set(fields).issubset(self.study_fields[fmt]):
raise ValueError(
"One of the fields is not valid!"
"Check the study_fields attribute for a list of valid ones."
"They are different depending on the return format, json or csv."
)
else:
concat_fields = "|".join(fields)
req = f"&query.term={search_expr}&markupFormat=legacy&fields={concat_fields}&pageSize={max_studies}"
url = f"{self._BASE_URL}studies?{format}{req}"
return handler(url)
def __repr__(self):
return f"ClinicalTrials.gov client v{self.api_info[0]}, database last updated {self.api_info[1]}"