Skip to content

Gemini-CLI

pypdf_helper

Gemini-Ai-Streamlit

Gemini-CLI

Gemini-Ai-Streamlit

Home
Commands
Reference
Reference
- gemini-cli
  gemini-cli
  - export
    
    export
    
    ExportDocx
  - gemini
    
    gemini
    
    gemini
    
    youtube_transcript
  - gemini-cli
  - pdf
    
    pdf
    
    pypdf_helper pypdf_helper
    Table of contents
    
    pypdf_helper
    
    PyPdfHelper
    
    __init__
    
    get_text
  - version

pypdf_helper

`PyPdfHelper`

Source code in src/gemini-cli/pdf/pypdf_helper.py

class PyPdfHelper:
    def __init__(self, path: str):
        """Initializes the PyPdfHelper class to read PDF files using PyPDF2 library
        Args:
            path (str): Path to the PDF file
        """
        try:
            self.path = path
            self.reader = PdfReader(path)
        except Exception as e:
            logging.error('Error reading PDF file: %s', e)
            raise

    def _extract_page_text(self, page_num: int) -> str:
        try:
            return self.reader.pages[page_num].extract_text()
        except Exception as e:
            logging.error('Error extracting text from page %d: %s', page_num, e)
            return ""

    def get_text(self, start: int = 0, end: int = None) -> str:
        """Extracts text from the PDF file from the given start and end page numbers
        Args:
            start (int, optional): Start page index. Default to 0.
            end (int, optional): End page index. Defaults to None.
        Returns:
            str: Extracted text from the PDF file
        """
        try:
            end = end if end is not None else self.reader.getNumPages()
            pages = range(start, end)

            with ThreadPoolExecutor() as executor:
                texts = executor.map(self._extract_page_text, pages)

            return "".join(texts)
        except Exception as e:
            logging.error('Error reading PDF file: %s', e)
            raise

`init(path)`

Initializes the PyPdfHelper class to read PDF files using PyPDF2 library Args: path (str): Path to the PDF file

Source code in src/gemini-cli/pdf/pypdf_helper.py

def __init__(self, path: str):
    """Initializes the PyPdfHelper class to read PDF files using PyPDF2 library
    Args:
        path (str): Path to the PDF file
    """
    try:
        self.path = path
        self.reader = PdfReader(path)
    except Exception as e:
        logging.error('Error reading PDF file: %s', e)
        raise

`get_text(start=0, end=None)`

Extracts text from the PDF file from the given start and end page numbers Args: start (int, optional): Start page index. Default to 0. end (int, optional): End page index. Defaults to None. Returns: str: Extracted text from the PDF file

Source code in src/gemini-cli/pdf/pypdf_helper.py

def get_text(self, start: int = 0, end: int = None) -> str:
    """Extracts text from the PDF file from the given start and end page numbers
    Args:
        start (int, optional): Start page index. Default to 0.
        end (int, optional): End page index. Defaults to None.
    Returns:
        str: Extracted text from the PDF file
    """
    try:
        end = end if end is not None else self.reader.getNumPages()
        pages = range(start, end)

        with ThreadPoolExecutor() as executor:
            texts = executor.map(self._extract_page_text, pages)

        return "".join(texts)
    except Exception as e:
        logging.error('Error reading PDF file: %s', e)
        raise