Source code for pytrials.client

from pytrials.utils import json_handler, csv_handler
from pytrials import study_fields
import csv



[docs]
class ClinicalTrials:
    """ClinicalTrials API client

    Provides functions to easily access the ClinicalTrials.gov API
    (https://classic.clinicaltrials.gov/api/)
    in Python.

    Attributes:
        study_fields: List of all study fields you can use in your query.
        api_info: Tuple containing the API version number and the last
        time the database was updated.
    """

    _BASE_URL = "https://clinicaltrials.gov/api/v2/"
    _JSON = "format=json"
    _CSV = "format=csv"

    def __init__(self):
        self.api_info = self.__api_info()

    @property
    def study_fields(self):
        """List of all study fields you can use in your query."""

        csv_fields = []
        json_fields = []
        with open(study_fields, "r") as f:
            reader = csv.DictReader(f)
            for row in reader:
                csv_fields.append(row["Column Name"])
                json_fields.append(row["Included Data Fields"].split("|"))

        return {
            "csv": csv_fields,
            "json": [item for sublist in json_fields for item in sublist],
        }

    def __api_info(self):
        """Returns information about the API"""
        req = json_handler(f"{self._BASE_URL}version")
        last_updated = req["dataTimestamp"]

        api_version = req["apiVersion"]

        return api_version, last_updated


[docs]
    def get_full_studies(self, search_expr, max_studies=50, fmt="csv"):
        """Returns all content for a maximum of 100 study records.

        Retrieves information from the full studies endpoint, which gets all study fields.
        This endpoint can only output JSON (Or not-supported XML) format and does not allow
        requests for more than 100 studies at once.

        Args:
            search_expr (str): A string containing a search expression as specified by
                `their documentation <https://clinicaltrials.gov/api/gui/ref/syntax#searchExpr>`_.
            max_studies (int): An integer indicating the maximum number of studies to return.
                Defaults to 50.

        Returns:
            dict: Object containing the information queried with the search expression.

        Raises:
            ValueError: The number of studies can only be between 1 and 100
        """
        if fmt == "csv":
            format = self._CSV
            handler = csv_handler
        elif fmt == "json":
            format = self._JSON
            handler = json_handler
        else:
            raise ValueError("Format argument has to be either 'csv' or 'json")

        if max_studies > 1000 or max_studies < 1:
            raise ValueError("The number of studies can only be between 1 and 1000")

        req = f"studies?{format}&markupFormat=legacy&query.term={search_expr}&pageSize={max_studies}"

        full_studies = handler(f"{self._BASE_URL}{req}")

        return full_studies



[docs]
    def get_study_fields(
        self, search_expr, fields, max_studies=50, fmt="csv"
    ):
        """Returns study content for specified fields

        Retrieves information from the study fields endpoint, which acquires specified information
        from a large (max 1000) studies. To see a list of all possible fields, check the class'
        study_fields attribute.

        Args:
            search_expr (str): A string containing a search expression as specified by
                `their documentation <https://clinicaltrials.gov/api/gui/ref/syntax#searchExpr>`_.
            fields (list(str)): A list containing the desired information fields.
            max_studies (int): An integer indicating the maximum number of studies to return.
                Defaults to 50.
            min_rnk (int): Minimum Rank sets the lower limit on the range of study records used to return results.
                If absent, defaults to 1.
            fmt (str): A string indicating the output format, csv or json. Defaults to csv.

        Returns:
            Either a dict, if fmt='json', or a list of records (e.g. a list of lists), if fmt='csv.
            Both containing the maximum number of study fields queried using the specified search expression.

        Raises:
            ValueError: The number of studies can only be between 1 and 1000
            ValueError: One of the fields is not valid! Check the study_fields attribute
                for a list of valid ones.
            ValueError: Format argument has to be either 'csv' or 'json'
        """
        if fmt == "csv":
            format = self._CSV
            handler = csv_handler
        elif fmt == "json":
            format = self._JSON
            handler = json_handler
        else:
            raise ValueError("Format argument has to be either 'csv' or 'json")

        if max_studies > 1000 or max_studies < 1:
            raise ValueError("The number of studies can only be between 1 and 1000")
        elif not set(fields).issubset(self.study_fields[fmt]):
            raise ValueError(
                "One of the fields is not valid!"
                "Check the study_fields attribute for a list of valid ones."
                "They are different depending on the return format, json or csv."
            )
        else:
            concat_fields = "|".join(fields)
            req = f"&query.term={search_expr}&markupFormat=legacy&fields={concat_fields}&pageSize={max_studies}"
            url = f"{self._BASE_URL}studies?{format}{req}"
            return handler(url)


    def __repr__(self):
        return f"ClinicalTrials.gov client v{self.api_info[0]}, database last updated {self.api_info[1]}"
Source code for pytrials.client

Pytrials

Navigation

Related Topics