Source code for nd2py.search.llmsr.api.gemini_api

# Copyright (c) 2024-present, Yumeow. Licensed under the MIT License.
import os
import logging
from typing import Generator, Tuple, List, Dict
from datetime import datetime, timezone, timedelta
from google import genai
from google.genai import types
from .llm_api import LLMAPI

_logger = logging.getLogger(__name__)



[docs]
class GeminiAPI(LLMAPI):
    supported_models = [
        "gemini-2.5-pro",
        "gemini-2.5-flash",
        "gemini-2.5-flash-lite-preview-06-17",
        "gemini-2.0-flash",
        "gemini-2.0-flash-lite",
    ]


[docs]
    def __init__(self, model='gemini-2.5-pro'):
        self.model = model


    def _request(self, messages: List|str, **kwargs) -> Generator[str, None, Dict]:
        ## Ensure this is a generator
        yield from []
        model = kwargs
        api_key = os.environ.get("GEMINI_API_KEY", None)
        os.environ["HTTP_PROXY"] = os.environ.get("MY_HTTP_PROXY", None)
        os.environ["HTTPS_PROXY"] = os.environ.get("MY_HTTPS_PROXY", None)
        config = types.GenerateContentConfig(
            candidate_count=self.generate_per_prompt,
            thinking_config=types.ThinkingConfig(
                thinking_budget=0,
                include_thoughts=True,
            ),
        )
        if self.model == "gemini-2.5-pro":
            prompt = prompt.replace(
                "Only return the `def get_equations(...)` function, put your thinkings and explanations in the docstring of the function or as comments in the code (between `def` and `return`).",
                "Only return the `def get_equations(...)` function and very brief explanations in the docstring, without complex thinkings or explanations.",
            )
            prompt = f"{prompt}\n\n(You are limited to thinking for 128 tokens.)"
            config.thinking_config.thinking_budget = 128
        if self.model in ["gemini-2.0-flash", "gemini-2.0-flash-lite"]:
            config.thinking_config = None
        client = genai.Client(api_key=api_key)
        try:
            response = client.models.generate_content(
                model=self.model,
                contents=prompt,
                config=config,
            )
        except Exception as e:
            model_list = self.supported_models
            idx = model_list.index(self.model)
            self.model = model_list[(idx + 1) % len(model_list)]
            _logger.error(
                f"Error requesting {self.model}, switching to {self.model}: {e}"
            )
            yield from []
            return
        finally:
            if self._model != self.model:
                # Check if the model has been used today
                now = datetime.now(timezone(timedelta(hours=-7)))
                date = now.strftime("%Y-%m-%d")
                if date not in self._date_list:
                    self._date_list.append(date)
                    self.model = self._model

        usage = response.usage_metadata
        total_tokens = usage.total_token_count
        prompt_tokens = usage.prompt_token_count
        reason_tokens = usage.thoughts_token_count or 0
        answer_tokens = total_tokens - prompt_tokens - reason_tokens
        usage = {
            "total": total_tokens,
            "prompt": prompt_tokens,
            "answer": answer_tokens,
            "reason": reason_tokens,
        }
        for candidate in response.candidates:
            for part in candidate.content.parts:
                if not part.text:
                    continue
                if part.thought:
                    continue
                else:
                    yield part.text, usage
                    break
            usage = {k: 0 for k in usage}  # Reset usage for next candidate
        return {"prompt": prompt, "response": response.to_json_dict()}