Unable to read a CSV using AzureOPENAI and Langchain with create_csv_agent(AzureOpenAI())

Satya Ramadas Metla 15 Reputation points
2024-01-31T19:26:20.42+00:00

I am trying to create a BOT on top of csv file using AzureOPENAI (llm) and Langchain framework. But i am getting "UnicodeDecodeError: 'utf-8' codec can't decode byte 0x92 in position 12062: invalid start byte" error when executed. Please refer my code snippet below and correct me if something is wrong

from langchain_experimental.agents.agent_toolkits.csv.base import create_csv_agent
from langchain_openai import AzureOpenAI
from dotenv import load_dotenv
import os
import streamlit as st

def main():
    load_dotenv()
    OPENAI_API_KEY = ""
    OPENAI_API_VERSION = "0301"
    
    os.environ["OPENAI_API_TYPE"] = "azure"
    os.environ["OPENAI_API_VERSION"] = "2023-05-15"
    os.environ["AZURE_OPENAI_ENDPOINT"] = "xxxx"
    os.environ["OPENAI_API_KEY"] = "xxxx"
    
    st.set_page_config(page_title="XXXXX")
    st.header("XXXXX ")
    
    llm = AzureOpenAI(
    deployment_name="name",
    model_name="gpt-3.5-turbo",
)
  
 
    
    agent = create_csv_agent(llm, 'Data.csv')
    user_question = st.text_input("Ask your question ")
    
    if user_question is not None and user_question != "":
            with st.spinner(text="In progress..."):
                st.write(agent.run(user_question))
    
if __name__ == "__main__":
    main()```

Error Message below:

UnicodeDecodeError: 'utf-8' codec can't decode byte 0x92 in position 12062: invalid start byte

Traceback:

File "C:\Program Files\Python39\lib\site-packages\streamlit\runtime\scriptrunner\script_runner.py", line 535, in _run_script
    exec(code, module.__dict__)
File "C:\UsersXXXmain.py", line 38, in <module>
    main()
File "C:\Users\XXXX\XXXX\main.py", line 27, in main
    agent = create_csv_agent(llm, 'Data.csv')
File "C:\Program Files\Python39\lib\site-packages\langchain_experimental\agents\agent_toolkits\csv\base.py", line 28, in create_csv_agent
    df = pd.read_csv(path, **_kwargs)
File "C:\Program Files\Python39\lib\site-packages\pandas\io\parsers\readers.py", line 1024, in read_csv
    return _read(filepath_or_buffer, kwds)
File "C:\Program Files\Python39\lib\site-packages\pandas\io\parsers\readers.py", line 618, in _read
    parser = TextFileReader(filepath_or_buffer, **kwds)
File "C:\Program Files\Python39\lib\site-packages\pandas\io\parsers\readers.py", line 1618, in __init__
    self._engine = self._make_engine(f, self.engine)
File "C:\Program Files\Python39\lib\site-packages\pandas\io\parsers\readers.py", line 1896, in _make_engine
    return mapping[engine](f, **self.options)
File "C:\Program Files\Python39\lib\site-packages\pandas\io\parsers\c_parser_wrapper.py", line 93, in __init__
    self._reader = parsers.TextReader(src, **kwds)
File "parsers.pyx", line 574, in pandas._libs.parsers.TextReader.__cinit__
File "parsers.pyx", line 663, in pandas._libs.parsers.TextReader._get_header
File "parsers.pyx", line 874, in pandas._libs.parsers.TextReader._tokenize_rows
File "parsers.pyx", line 891, in pandas._libs.parsers.TextReader._check_tokenize_status
File "parsers.pyx", line 2053, in pandas._libs.parsers.raise_parser_error
Azure OpenAI Service
Azure OpenAI Service
An Azure service that provides access to OpenAI’s GPT-3 models with enterprise capabilities.
4,080 questions
0 comments No comments
{count} votes

2 answers

Sort by: Most helpful
  1. Anonymous
    2024-01-31T22:32:30.08+00:00

    There are known issues in older versions of pandas dealing with fancy quote (0x92). You can clean up the csv file, or try other codepages as per this SO thread: https://stackoverflow.com/questions/46000191/utf-8-codec-cant-decode-byte-0x92-in-position-18-invalid-start-byte


  2. Saurabh Sharma 23,846 Reputation points Microsoft Employee Moderator
    2024-02-13T23:25:06.9966667+00:00

    @Satya Ramadas Metla I am able to run it without any issues. User's image

    Can you please try with the below code if this works for you -

    from langchain_experimental.agents.agent_toolkits.csv.base import create_csv_agent
    from langchain_openai import AzureOpenAI
    from dotenv import load_dotenv
    import os
    import streamlit as st
    
    
    
    def main():
        load_dotenv()
        endpoint = os.getenv("AOAIEndpoint") 
        api_key = os.getenv("AOAIKey")
        api_version = "2023-09-15-preview"   
        deployment_name="test_chatgpt"
        model_name="gpt-35-turbo"
        
        st.set_page_config(page_title="Chat on CSV Data")
        st.header("Ask Questions about the CSV Data using Azure OpenAI!")
        
        llm = AzureOpenAI(azure_endpoint=endpoint, openai_api_key=api_key, api_version=api_version,
        deployment_name=deployment_name,
        model_name=model_name)
    
        agent = create_csv_agent(llm,  './Jupyter Notebooks/data/Sales_Performance_Report_DQLab_Store.csv')
        user_question = st.text_input("Ask your question here:")
    
        if user_question is not None and user_question != "":
                with st.spinner(text="In progress..."):
                    st.write(agent.run(user_question))
        
    if __name__ == "__main__":
        main()
    

    Please let me know if you have any other questions. Thanks Saurabh


Your answer

Answers can be marked as Accepted Answers by the question author, which helps users to know the answer solved the author's problem.