6장 랭체인(LangChain)

랭체인이란?

서비스 구조

모듈

설치

pip install langchain

LLM 예제

!pip install langchain
!pip install openai

import os
os.environ["OPENAI_API_KEY"] = "APIKEY"

from langchain.llms import OpenAI
llm = OpenAI(temperature=0.9)

print(llm("컴퓨터 게임을 만드는 새로운 한국어 회사명을 하나 제안해 주세요."); //비바게임즈

프롬프트 템플릿 예제

from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain

prompt = PromptTemplate(
	input_variables=["product"],
	template="{product}을 만드는 새로운 한국어 회사명을 하나 제안해 주세요.",
)

print(prompt.format(product="가정용 로봇"))

체인 사용법

from langchain.chains import LLMChain
from langchain.llm import OepnAI
from langchin.prompts import PromptTemplate

prompt = PromptTemplate(
	input_variables=["product"],
	template="{product}을 만드는 새로운 한국어 회사명을 하나 제안해 주세요.",
)

chain = LLMChain(
	llm=OpenAI(temperature=0.9),
	prompt=prompt
)

# 싱글 input, output일때는 run을 쓸수 있다.
chain.run("가정용 로봇")

# 여러 input, output일때는 call을 쓴다.
chain.call({product:"가정용 로봇"})

에이전트와 도구 사용법

!pip install google-search-results

import os
os.environ["SERPAPI_API_KEY"] = "SERPAPI_KEY"

from langchain.agents import load_tools
from langchain.llms import OpenAI

tools = load_tools(
	tools_names=["serpapi","llm-math"],
	llm-OpenAI(temperature=0)
)

from langchain.agents import initialize_agent

agent = initialize_agent(
	agent="zero-shot-react-description",
	llm=OpenAI(temperature=0),
	tools=tools,
	verbose=True
)

# llm이 질문을 판단하여 llm-math를 사용합니다.
agnet.run("123*4를 계산기로 계산하세요.") 

# llm이 질문을 판단하여 SerpAPI를 사용합니다.
agent.run("오늘 한국 서울 날시를 웹 검색으로 확인하세요.");

메모리 사용 예제

from langchain.chains import ConversationChain
from langchain.llms import OpenAI

chain = ConversationChain(
	llm=OpenAI(temperature=0),
	verbose=True
)

Use cases

https://python.langchain.com/docs/use_cases

LangServe

특징

설치

pip install "langserve[all]"

예제

# Server
"""Example LangChain server exposes multiple runnables (LLMs in this case)."""

from fastapi import FastAPI
from langchain.chat_models import ChatAnthropic, ChatOpenAI

from langserve import add_routes

app = FastAPI(
    title="LangChain Server",
    version="1.0",
    description="Spin up a simple api server using Langchain's Runnable interfaces",
)

add_routes(
    app,
    ChatOpenAI(),
    path="/openai",
)
add_routes(
    app,
    ChatAnthropic(),
    path="/anthropic",
)

if __name__ == "__main__":
    import uvicorn

    uvicorn.run(app, host="localhost", port=8000)
# Client
from langchain.prompts.chat import ChatPromptTemplate
from langserve import RemoteRunnable

openai_llm = RemoteRunnable("<http://localhost:8000/openai/>")
anthropic = RemoteRunnable("<http://localhost:8000/anthropic/>")

# We can use either LLM
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            "You are a highly educated person who loves to use big words. "
            + "You are also concise. Never answer in more than three sentences.",
        ),
        ("human", "Tell me about your favorite novel"),
    ]
).format_messages()

# AIMessage(content=" My favorite novel is Moby Dick by Herman Melville. The intricate plot and rich symbolism make it a complex and rewarding read. Melville's masterful prose vividly evokes the perilous life of whalers on 19th century ships.", additional_kwargs={}, example=False)
anthropic.invoke(prompt)

# My favorite novel is Moby-Dick by Herman Melville. The epic tale of Captain Ahab's quest to find and destroy the great white whale is a masterwork of American literature. Melville's dense, philosophical prose and digressive storytelling style make the novel a uniquely challenging and rewarding read.
for chunk in anthropic.stream(prompt):
    print(chunk.content, end="", flush=True)

# My favorite novel is The Art of Language by Maximo Quilana. It is a philosophical treatise on the beauty and complexity of human speech. The prose is elegant yet precise.
async for chunk in anthropic.astream(prompt):
    print(chunk.content, end="", flush=True)

# As with regular runnables, async invoke, batch and async batch variants are available by default
openai_llm.invoke(prompt)
# AIMessage(content='My favorite novel is "Ulysses" by James Joyce. It\\'s a complex and innovative work that explores the intricacies of human consciousness and the challenges of modernity in a highly poetic and experimental manner. The prose is richly layered and rewards careful reading.', additional_kwargs={}, example=False)
await openai_llm.ainvoke(prompt)

#[AIMessage(content=" My favorite novel is Moby Dick by Herman Melville. The epic tale of Captain Ahab's obsessive quest to kill the great white whale is a profound meditation on man's struggle against nature. Melville's poetic language immerses the reader in the mysticism of the high seas.", additional_kwargs={}, example=False),
# AIMessage(content=" My favorite novel is Moby Dick by Herman Melville. The intricate details of whaling, though tedious at times, serve to heighten the symbolism and tension leading to the epic battle between Captain Ahab and the elusive white whale. Melville's sublime yet economical prose immerses the reader in a turbulent seascape teeming with meaning.", additional_kwargs={}, example=False)]
anthropic.batch([prompt, prompt])

# Streaming is available by default

# [AIMessage(content=' Here is a concise description of my favorite novel in three sentences:\\n\\nMy favorite novel is Moby Dick by Herman Melville. It is the epic saga of the obsessed Captain Ahab pursuing the white whale that crippled him through the seas. The novel explores deep philosophical questions through rich symbols and metaphors.', additional_kwargs={}, example=False),
# AIMessage(content=" My favorite novel is Moby Dick by Herman Melville. The epic tale of Captain Ahab's obsessive quest for the great white whale is a masterpiece of American literature. Melville's writing beautifully evokes the mystery and danger of the high seas.", additional_kwargs={}, example=False)]
await anthropic.abatch([prompt, prompt])

from langchain.schema.runnable import RunnablePassthrough
comedian_chain = (
    ChatPromptTemplate.from_messages(
        [
            (
                "system",
                "You are a comedian that sometimes tells funny jokes and other times you just state facts that are not funny. Please either tell a joke or state fact now but only output one.",
            ),
        ]
    )
    | openai_llm
)

joke_classifier_chain = (
    ChatPromptTemplate.from_messages(
        [
            (
                "system",
                "Please determine if the joke is funny. Say `funny` if it's funny and `not funny` if not funny. Then repeat the first five words of the joke for reference...",
            ),
            ("human", "{joke}"),
        ]
    )
    | anthropic
)

chain = {"joke": comedian_chain} | RunnablePassthrough.assign(
    classification=joke_classifier_chain
)

# {'joke': AIMessage(content="Why don't scientists trust atoms?\\n\\nBecause they make up everything!", additional_kwargs={}, example=False),
# 'classification': AIMessage(content=" not funny\\nWhy don't scientists trust atoms?", additional_kwargs={}, example=False)}
chain.invoke({})

LangSmith

LangSmith와 Lilac으로 LLM을 fine-tuning

https://blog.langchain.dev/fine-tune-your-llms-with-langsmith-and-lilac/

OpenAI Fine-Tuning

from langsmith import Client

client = Client()

import datetime

project_name = "default"
run_type = "llm"
end_time = datetime.datetime.now()

runs = client.list_runs(
        project_name=project_name,
        run_type=run_type,
        error=False,
)

from langchain import chains, chat_models, prompts, schema, callbacks

chain = prompts.ChatPromptTemplate.from_template("Tell a joke for:\\n{input}") | chat_models.ChatAnthropic(tags=['my-anthropic-run']) | schema.output_parser.StrOutputParser()

with callbacks.collect_runs() as cb:
    chain.invoke({"input": "foo"})
    # Assume feedback is logged
    run = cb.traced_runs[0]
    client.create_feedback(run.id, key="user_click", score=1)

project_name = "default"
end_time = datetime.datetime.now()

runs = client.list_runs(
        project_name=project_name,
        execution_order=1,
        filter='and(eq(feedback_key, "user_click"), eq(feedback_score, 1))',
        # For continuous scores, you can filter for >, <, >=, <= with the followingg arguments: gt/lt/gte/lte(feedback_score, 0.9)
        # filter='and(eq(feedback_key, "user_click"), gt(feedback_score, 0.9))',
        error=False,
)

llm_runs = []
for run in runs:
    llm_run = next(client.list_runs(project_name=project_name, run_type="llm", parent_run_id=run.id))
    llm_runs.append(llm_run)

llm_runs[0].tags

# For any "Chain" object, you can add tags directly on the Example with LLMChain
import uuid

unique_tag = f"call:{uuid.uuid4()}"

chain = chains.LLMChain(
        llm=chat_models.ChatAnthropic(tags=['my-cool-llm-tag']),  # This tag will only be applied to the LLM
        prompt=prompts.ChatPromptTemplate.from_template("Tell a joke based on the following prompt:\\n\\nPrompt:{input}"),
        tags=["my-tag"]
)

# You can also define at call time for the call/invoke/batch methods.
# This tag will be propagated to all child calls
print(chain({"input": "podcasting these days"}, tags=[unique_tag]))

# If you're defining using Runnables (aka langchain expression language)
runnable = (
    prompts.ChatPromptTemplate.from_template("Tell a joke based on the following prompt:\\n\\nPrompt:{input}")
    | chat_models.ChatAnthropic(tags=['my-cool-llm-tag']) # This tag will only be applied to the LLM
    | schema.StrOutputParser(tags=['some-parser-tag'])
)

# Again, you can tag at call time as well. This tag will be propagated to all child calls
print(runnable.invoke({"input": "podcasting these days"}, {"tags": [unique_tag]}))

project_name = "default"
end_time = datetime.datetime.now()

runs = client.list_runs(
        execution_order=1, # Only return the root trace
        filter=f'has(tags, "{unique_tag}")',
)
len(list(runs))

project_name = "default"
run_type = "llm"
end_time = datetime.datetime.now()

runs = client.list_runs(
        project_name=project_name,
        run_type=run_type,
        filter='eq(name, "ChatAnthropic")',
        error=False,
)

# Example chain for the following query
from langchain import prompts, chat_models

chain = (
    prompts.ChatPromptTemplate.from_template(
        "Summarize the following chat log: {input}"
    )
    | chat_models.ChatOpenAI()
)

chain.invoke({"input": "hi there, hello...."})

import datetime

project_name = "default"
run_type = "prompt"
end_time = datetime.datetime.now()

runs = client.list_runs(
        project_name=project_name,
        run_type=run_type,
        end_time=end_time,
        error=False,
)

# You can then get a sibling LLM run by searching by parent_run_id and including other criteria
for prompt_run in runs:
    llm_run = next(client.list_runs(project_name=project_name, run_type="llm", parent_run_id=prompt_run.parent_run_id))
    inputs, outputs = prompt_run.inputs, llm_run.outputs

dataset = client.create_dataset(
    dataset_name = "Fine-Tuning Dataset Example", 
    description=f"Chat logs taken from project {project_name} for fine-tuning",
    data_type="chat",
)
for run in runs:
    if 'messages' not in run.inputs or not run.outputs:
        # Filter out non chat runs
        continue
    try:
        # Convenience method for creating a chat example
        client.create_example_from_run(
            dataset_id=dataset.id,
            run=run,
        )
        # Or if you want to select certain keys/values in inputs
        # inputs = convert_inputs(run.inputs)
        # outputs = convert_outputs(run.outputs)
        # client.create_example(
        #     dataset_id=dataset.id,
        #     inputs=inputs,
        #     outputs=outputs,
        #     run=run,
        # )
    except:
        # Duplicate inputs raise an exception
        pass

from langsmith import schemas
from langchain import load

def convert_messages(example: schemas.Example) -> dict:
    messages = load.load(example.inputs)['messages']
    message_chunk = load.load(example.outputs)['generations'][0]['message']
    return {"messages": messages + [message_chunk]}

messages = [
    convert_messages(example)
    for example in client.list_examples(dataset_name="Fine-Tuning Dataset Example")
]

from langchain.adapters import openai as openai_adapter

finetuning_messages = openai_adapter.convert_messages_for_finetuning(messages)

import time
import json
import io

import openai

my_file = io.BytesIO()
for group in finetuning_messages:
    if any(["function_call" in message for message in group]):
        continue
    my_file.write((json.dumps({"messages": group}) + "\\n").encode('utf-8'))

my_file.seek(0)
training_file = openai.File.create(
  file=my_file,
  purpose='fine-tune'
)

# Wait while the file is processed
status = openai.File.retrieve(training_file.id).status
start_time = time.time()
while status != "processed":
    print(f"Status=[{status}]... {time.time() - start_time:.2f}s", end="\\r", flush=True)
    time.sleep(5)
    status = openai.File.retrieve(training_file.id).status
print(f"File {training_file.id} ready after {time.time() - start_time:.2f} seconds.")

job = openai.FineTuningJob.create(
    training_file=training_file.id,
    model="gpt-3.5-turbo",
)

# It may take 10-20+ minutes to complete training.
status = openai.FineTuningJob.retrieve(job.id).status
start_time = time.time()
while status != "succeeded":
    print(f"Status=[{status}]... {time.time() - start_time:.2f}s", end="\\r", flush=True)
    time.sleep(5)
    job = openai.FineTuningJob.retrieve(job.id)
    status = job.status

from langchain import chat_models, prompts

model_name = job.fine_tuned_model
# Example: ft:gpt-3.5-turbo-0613:personal::5mty86jblapsed
model = chat_models.ChatOpenAI(model=model_name)
chain.invoke({"input": "Who are you designed to assist?"})

LangGraph

#langchain #랭체인 #chatgpt