Skip to content

Rayyan

RayyanReader #

Bases: BaseReader

Rayyan阅读器。从Rayyan评论中阅读文章。

Parameters:

Name Type Description Default
credentials_path str

Rayyan凭证路径。

required
rayyan_url str

Rayyan URL。默认为https://rayyan.ai。 如果您使用非生产Rayyan实例,请设置为替代URL。

'https://rayyan.ai'
Source code in llama_index/readers/rayyan/base.py
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
class RayyanReader(BaseReader):
    """Rayyan阅读器。从Rayyan评论中阅读文章。

    Args:
        credentials_path (str): Rayyan凭证路径。
        rayyan_url (str, optional): Rayyan URL。默认为https://rayyan.ai。
            如果您使用非生产Rayyan实例,请设置为替代URL。"""

    def __init__(
        self, credentials_path: str, rayyan_url: str = "https://rayyan.ai"
    ) -> None:
        """初始化Rayyan阅读器。"""
        from rayyan import Rayyan
        from rayyan.user import User

        logging.debug("Initializing Rayyan reader...")
        self.rayyan = Rayyan(credentials_path, url=rayyan_url)
        user = User(self.rayyan).get_info()
        logging.info(f"Signed in successfully to Rayyan as: {user['displayName']}!")

    def load_data(self, review_id: str, filters: dict = {}) -> List[Document]:
        """从评论中加载文章。

Args:
    review_id(int):Rayyan评论ID。
    filters(dict,可选):要应用到评论的过滤器。默认为None。按原样传递给Rayyan评论结果方法。

Returns:
    List[Document]:文档列表。
"""
        from tenacity import (
            retry,
            stop_after_attempt,
            stop_after_delay,
            stop_all,
            wait_random_exponential,
        )
        from tqdm import tqdm

        from rayyan.review import Review

        rayyan_review = Review(self.rayyan)
        my_review = rayyan_review.get(review_id)
        logging.info(
            f"Working on review: '{my_review['title']}' with {my_review['total_articles']} total articles."
        )

        result_params = {"start": 0, "length": 100}
        result_params.update(filters)

        @retry(
            wait=wait_random_exponential(min=1, max=10),
            stop=stop_all(stop_after_attempt(3), stop_after_delay(30)),
        )
        def fetch_results_with_retry():
            logging.debug("Fetch parameters: %s", result_params)
            return rayyan_review.results(review_id, result_params)

        articles = []
        logging.info("Fetching articles from Rayyan...")
        total = my_review["total_articles"]
        with tqdm(total=total) as pbar:
            while len(articles) < total:
                # retrieve articles in batches
                review_results = fetch_results_with_retry()
                fetched_articles = review_results["data"]
                articles.extend(fetched_articles)
                # update total in case filters are applied
                if total != review_results["recordsFiltered"]:
                    total = review_results["recordsFiltered"]
                    pbar.total = total
                result_params["start"] += len(fetched_articles)
                pbar.update(len(fetched_articles))

        results = []
        for article in articles:
            # iterate over all abstracts
            abstracts = ""
            if article["abstracts"] is not None:
                abstracts_arr = [
                    abstract["content"] for abstract in article["abstracts"]
                ]
                if len(abstracts_arr) > 0:
                    # map array into a string
                    abstracts = "\n".join(abstracts_arr)[0:1024].strip()
            title = article["title"]
            if title is not None:
                title = title.strip()
            body = f"{title}\n{abstracts}"
            if body.strip() == "":
                continue
            extra_info = {"id": article["id"], "title": title}

            results.append(
                Document(
                    text=body,
                    extra_info=extra_info,
                )
            )

        return results

load_data #

load_data(
    review_id: str, filters: dict = {}
) -> List[Document]

从评论中加载文章。

Returns:

Type Description
List[Document]

List[Document]:文档列表。

Source code in llama_index/readers/rayyan/base.py
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
    def load_data(self, review_id: str, filters: dict = {}) -> List[Document]:
        """从评论中加载文章。

Args:
    review_id(int):Rayyan评论ID。
    filters(dict,可选):要应用到评论的过滤器。默认为None。按原样传递给Rayyan评论结果方法。

Returns:
    List[Document]:文档列表。
"""
        from tenacity import (
            retry,
            stop_after_attempt,
            stop_after_delay,
            stop_all,
            wait_random_exponential,
        )
        from tqdm import tqdm

        from rayyan.review import Review

        rayyan_review = Review(self.rayyan)
        my_review = rayyan_review.get(review_id)
        logging.info(
            f"Working on review: '{my_review['title']}' with {my_review['total_articles']} total articles."
        )

        result_params = {"start": 0, "length": 100}
        result_params.update(filters)

        @retry(
            wait=wait_random_exponential(min=1, max=10),
            stop=stop_all(stop_after_attempt(3), stop_after_delay(30)),
        )
        def fetch_results_with_retry():
            logging.debug("Fetch parameters: %s", result_params)
            return rayyan_review.results(review_id, result_params)

        articles = []
        logging.info("Fetching articles from Rayyan...")
        total = my_review["total_articles"]
        with tqdm(total=total) as pbar:
            while len(articles) < total:
                # retrieve articles in batches
                review_results = fetch_results_with_retry()
                fetched_articles = review_results["data"]
                articles.extend(fetched_articles)
                # update total in case filters are applied
                if total != review_results["recordsFiltered"]:
                    total = review_results["recordsFiltered"]
                    pbar.total = total
                result_params["start"] += len(fetched_articles)
                pbar.update(len(fetched_articles))

        results = []
        for article in articles:
            # iterate over all abstracts
            abstracts = ""
            if article["abstracts"] is not None:
                abstracts_arr = [
                    abstract["content"] for abstract in article["abstracts"]
                ]
                if len(abstracts_arr) > 0:
                    # map array into a string
                    abstracts = "\n".join(abstracts_arr)[0:1024].strip()
            title = article["title"]
            if title is not None:
                title = title.strip()
            body = f"{title}\n{abstracts}"
            if body.strip() == "":
                continue
            extra_info = {"id": article["id"], "title": title}

            results.append(
                Document(
                    text=body,
                    extra_info=extra_info,
                )
            )

        return results