Initial commit

75cc9994 · David Matthew Antonio · 75cc9994 · 75cc9994 · 75cc9994 · 75cc9994
Commit 75cc9994 authored 9 months ago by David Matthew Antonio
--- a/.gitignore
+++ b/.gitignore
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+
+secret.py
+Stupid_credentials.txt
+data/
+chroma_langchain_db/
\ No newline at end of file
--- a/README.md
+++ b/README.md
+# Stock and LLM
+I have a stooockk, i have an L L M, ugghhhh, penpineappleapplepen
+
+## Requirements
+**1. API KEYS**
+
+The project requires API keys for EODHD and OpenAI to work, they are to be stored in a python file named `secret.py`
+
+secret.py example:
+```python
+APIKEYS = {
+    'OPENAI':'sk-a7388ddffa0bc5fb5a8e4f2128cfaea336e885080fade2f45568ee36770982dd',
+    'EODHD':'c4134600f900d3d.5544920',
+    }
+```
+_Yes these keys are fake_
+
+
+**2. Dependencies**
+
+You also need some dependencies for **Python 3.12**, they can be installed with the included requirements.txt.
+
+
+## Running the program
+
+Before running the program, configurations can be made by accessing `config.py`.
+
+You also should run the `initializer.py` file, this will collect data from the EODHD API.
+
+Finally you can go ahead and run app.py to start the gradio app.
+
+## Limitations
+
+Only stock exchange available is from the PSE, this is due to api limits
+
+Only monthly stock values are available, theres too much data if the period is daily. 
+
--- a/__init__.py
+++ b/__init__.py
--- a/app.py
+++ b/app.py
+# from langchain_core.prompts import ChatPromptTemplate
+# from langchain_openai import ChatOpenAI
+# from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
+import config
+import interface
+import initializer
+def main():    
+    if (config.INITIALIZE_ON_RUN):
+        initializer.run()
+    
+    gradio_app = interface.initialize()
+    gradio_app.launch(share = config.GRADIO_PUBLIC)
+
+
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
--- a/config.py
+++ b/config.py
+# Base path for all local data
+BASE_DATA_PATH = 'data/'
+
+# Directories must end with a '/'
+TICKERS_DIRECTORY = 'tickers/'
+STOCKS_DIRECTORY = 'stocks/'
+FUSED_DATA_DIRECTORY = 'fused/'
+# Files may include its following path and must end with the respective extension
+EXCHANGES_FILEPATH = 'exchanges.json'
+
+# Some flags
+INITIALIZE_ON_RUN = True
+GRADIO_PUBLIC = True
+LLM_MODEL = "gpt-4o-mini"
+
+def get_tickers_directory() -> str:
+    return BASE_DATA_PATH + TICKERS_DIRECTORY
+
+def get_stocks_directory() -> str:
+    return BASE_DATA_PATH + STOCKS_DIRECTORY
+
+def get_fused_data_directory() -> str:
+    return BASE_DATA_PATH + FUSED_DATA_DIRECTORY
+
+def get_exchanges_filepath() -> str:
+    return BASE_DATA_PATH + EXCHANGES_FILEPATH
+
--- a/controllers/__init__.py
+++ b/controllers/__init__.py
--- a/controllers/prompt.py
+++ b/controllers/prompt.py
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_openai import ChatOpenAI
+from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
+import config
+import os
+
+try:
+    import secret
+    OPENAIKEY = secret.APIKEYS['OPENAI']
+except:
+    OPENAIKEY = os.environ['OPENAI']
+
+model = ChatOpenAI(model=config.LLM_MODEL, api_key = OPENAIKEY)
+
+background_context = ChatPromptTemplate.from_messages([
+    ("system", """You are an assistant specialized in stock data. You may receive human prompts asking for the price of a specific stock name or code. 
+    Accompanying the human prompt will be context.
+    For questions specific about a certain stock coded, the context usually is a json formatted information about the stock, from its name, to the highs and lows in each date. Additionally accompanying the json data is the metadata.
+    Different questions that are more broad, such as what are the available stocks may return different contexts, such as a list of the stocks.
+    Should the context given not provide sufficient information, you may admit that you lack the knowledge in your response to the user prompt.
+    """),
+    ("human", "Context:{context}\n\n###\n\nQuery: {query}")])
+
+def send_prompt(message:str, context:str):
+    chain = background_context | model
+    return chain.invoke({
+        "query": message,
+        "context": context,
+    })
+    
+
+
+
--- a/controllers/rag.py
+++ b/controllers/rag.py
+from langchain_chroma import Chroma
+from chromadb.config import DEFAULT_TENANT, DEFAULT_DATABASE, Settings
+from langchain_core.documents import Document
+from langchain_openai import OpenAIEmbeddings
+from utility import stock_data_helper
+from langchain_text_splitters import RecursiveJsonSplitter
+import os
+try:
+    import secret
+    OPENAIKEY = secret.APIKEYS['OPENAI']
+except:
+    OPENAIKEY = os.environ['OPENAI']
+    
+def get_vectordb():
+    embeddings = OpenAIEmbeddings(model="text-embedding-3-large", api_key = OPENAIKEY)
+    tickers = stock_data_helper.get_stock_codes("PSE")
+    stocks = [stock_data_helper.get_stock_data("PSE", i['Code']) for i in tickers]
+
+    vector_store = Chroma(
+    collection_name="Stocks",
+    embedding_function=embeddings,
+    persist_directory="./chroma_langchain_db",  # Where to save data locally, remove if not necessary
+    )
+
+    print('Now Creating Documents')
+    documents = []
+    for tick in tickers:
+        stock_data = stock_data_helper.get_stock_data("PSE", tick['Code'])
+        document = Document(page_content=
+        f"""
+        METADATA:
+        Code: {tick['Code']},
+        Name: {tick['Name']},
+        Country: {tick['Country']}
+        !HISTORICAL DATA:
+        {str(stock_data)}
+        """, 
+        metadata ={
+                "Code": tick['Code'],
+                "Name": tick['Name'],
+                "Country": tick['Country'],
+            }
+        )
+        documents.append(document)
+    general_stock_context = Document(page_content=
+        f"""
+        METADATA:
+        Name: "General/All stocks/tickers/companies in the philippines",
+        Country: "Philippines"
+        !HISTORICAL DATA:
+        {str(stock_data_helper.get_stock_codes)}
+        """, 
+        metadata ={
+                "Name": "General/All stocks/tickers/companies in the philippines",
+            }
+        )
+    documents.append(general_stock_context)
+    print('Now adding documents to db')
+    vector_store.add_documents(documents=documents)
+
+    return vector_store
--- a/controllers/stocks.py
+++ b/controllers/stocks.py
+import datetime
+import requests
+import config
+import secret 
+import json
+import os
+try:
+    import secret
+    EODHD_KEY = secret.APIKEYS['EODHD']
+except:
+    EODHD_KEY = os.environ['EODHD']
+
+EXCHANGES_FILEPATH = config.get_exchanges_filepath()
+TICKERS_DIRECTORY = config.get_tickers_directory()
+STOCKS_DIRECTORY = config.get_stocks_directory()
+
+def search_query(query:str):
+    url = f'https://eodhd.com/api/search/{query}?api_token={EODHD_KEY}&fmt=json'
+    
+    try:
+        return requests.get(url).json()
+    except Exception as e:
+        print(e)
+        exit()
+
+def retrieve_exchanges():
+    url = f'https://eodhd.com/api/exchanges-list/?api_token={EODHD_KEY}&fmt=json'
+
+    try:
+        exchanges_list = requests.get(url).json()
+    except Exception as e:
+        print(e)
+        exit()
+    
+
+    with open(EXCHANGES_FILEPATH, 'w') as filename:
+        json.dump(exchanges_list, filename)
+
+    return exchanges_list
+
+def retrieve_tickers(exchange_code:str):
+
+    url = f'https://eodhd.com/api/exchange-symbol-list/{exchange_code}?api_token={EODHD_KEY}&fmt=json'
+
+    try:
+        tickers_list = requests.get(url).json()
+    except Exception as e:
+        print(e)
+        exit()
+
+    if not os.path.exists(TICKERS_DIRECTORY):
+        os.makedirs(TICKERS_DIRECTORY)
+
+    with open(TICKERS_DIRECTORY + f'{exchange_code}.json', 'w') as filename:
+        json.dump(tickers_list, filename)
+
+    return tickers_list
+
+def retrieve_eod_historical_data(exchange_code:str, stock_code:str, period:int = 1):
+    current_date = datetime.date.today()
+    earlier_date = datetime.date(year = current_date.year - 1, month = current_date.month, day = current_date.day)
+    
+    url = f'https://eodhd.com/api/eod/{stock_code}.{exchange_code}?from={earlier_date}&to={current_date}&period=m&api_token={EODHD_KEY}&fmt=json'
+
+    try:
+        stock_prices_list = requests.get(url).json()
+    except Exception as e:
+        print(e)
+        exit()
+
+    if not os.path.exists(f"{STOCKS_DIRECTORY}{exchange_code}"):
+        os.makedirs(f"{STOCKS_DIRECTORY}{exchange_code}")
+    
+    with open(f"{STOCKS_DIRECTORY}{exchange_code}/" + f'{stock_code}.json', 'w') as filename:
+        json.dump(stock_prices_list, filename)
+
+    return stock_prices_list
+    
\ No newline at end of file
--- a/initializer.py
+++ b/initializer.py
+from controllers import stocks
+from utility import stock_data_helper
+import multiprocessing as mp
+def run():
+    try:
+        stocks.retrieve_exchanges()
+    except:
+        print("Cannot retrieve exchanges from the API")
+        exit()
+    else:
+        print("Exchange list updated!")
+
+    # Due to API Limits, only Philippine stocks are considered
+    try:
+        tickers = stocks.retrieve_tickers("PSE")
+    except:
+        print("Cannot retrieve tickers from the API")
+        exit()
+    else:
+        print("Ticker list updated!")
+
+    
+    tickers = stock_data_helper.get_stock_codes("PSE")
+    processes = [mp.Process(target = stocks.retrieve_eod_historical_data, args = ("PSE", i["Code"])) for i in tickers]
+    for p in processes:
+        p.start()
+    for p in processes:
+        p.join()
+    
+    # try:
+    #     stock_data_helper.create_fused_data("PSE")
+    # except Exception as exception:
+    #     print(exception)
+if __name__ == "__main__":
+    run()
\ No newline at end of file
--- a/interface.py
+++ b/interface.py
+import gradio as gr
+from controllers import rag, prompt
+vector_db = rag.get_vectordb()
+
+def get_prompt_response(query:str):
+    context = vector_db.similarity_search(query,k=10)
+    ai_message = prompt.send_prompt(query, str(context))
+    return ai_message.content
+
+def initialize():
+    with gr.Blocks() as app:
+        gr.Markdown("# PSE Stock LLM App")
+        gr.Markdown("> Only PSE Stocks are supported, theres a limit on the API. Also only monthly data is available, daily would be too much")
+
+        main_chat_input = gr.Textbox(label="Start Chatting:")
+        
+        # Dropdown menu
+        # dropdown = gr.Dropdown(
+        #     label="Choose an option:",
+        #     choices=["Option 1", "Option 2", "Option 3"],
+        #     value="Option 1"  # Default value
+        # )
+        
+        main_chat_submit_button = gr.Button("Send")
+
+        # Output display
+        main_output_text = gr.Textbox(label="Output", interactive=False)
+
+        # Define the interaction
+        main_chat_submit_button.click(
+            fn=get_prompt_response,
+            inputs=[main_chat_input],
+            outputs=main_output_text
+        )
+
+    return app
--- a/requirements.txt
+++ b/requirements.txt
+aiofiles==23.2.1
+aiohappyeyeballs==2.4.3
+aiohttp==3.10.8
+aiosignal==1.3.1
+annotated-types==0.7.0
+anyio==4.6.0
+asgiref==3.8.1
+attrs==24.2.0
+backoff==2.2.1
+bcrypt==4.2.0
+build==1.2.2
+cachetools==5.5.0
+certifi==2024.8.30
+charset-normalizer==3.3.2
+chroma-hnswlib==0.7.6
+chromadb==0.5.11
+click==8.1.7
+coloredlogs==15.0.1
+contourpy==1.3.0
+cycler==0.12.1
+dataclasses-json==0.6.7
+Deprecated==1.2.14
+distro==1.9.0
+durationpy==0.9
+fastapi==0.115.0
+ffmpy==0.4.0
+filelock==3.16.1
+flatbuffers==24.3.25
+fonttools==4.54.1
+frozenlist==1.4.1
+fsspec==2024.9.0
+google-auth==2.35.0
+googleapis-common-protos==1.65.0
+gradio==4.44.1
+gradio_client==1.3.0
+greenlet==3.1.1
+grpcio==1.66.2
+h11==0.14.0
+httpcore==1.0.6
+httptools==0.6.1
+httpx==0.27.2
+huggingface-hub==0.25.1
+humanfriendly==10.0
+idna==3.10
+importlib_metadata==8.4.0
+importlib_resources==6.4.5
+Jinja2==3.1.4
+jiter==0.5.0
+jsonpatch==1.33
+jsonpointer==3.0.0
+kiwisolver==1.4.7
+kubernetes==31.0.0
+langchain==0.3.1
+langchain-chroma==0.1.4
+langchain-community==0.3.1
+langchain-core==0.3.7
+langchain-openai==0.2.1
+langchain-text-splitters==0.3.0
+langsmith==0.1.129
+lark==1.2.2
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+marshmallow==3.22.0
+matplotlib==3.9.2
+mdurl==0.1.2
+mmh3==5.0.1
+monotonic==1.6
+mpmath==1.3.0
+multidict==6.1.0
+mypy-extensions==1.0.0
+numpy==1.26.4
+oauthlib==3.2.2
+onnxruntime==1.19.2
+openai==1.51.0
+opentelemetry-api==1.27.0
+opentelemetry-exporter-otlp-proto-common==1.27.0
+opentelemetry-exporter-otlp-proto-grpc==1.27.0
+opentelemetry-instrumentation==0.48b0
+opentelemetry-instrumentation-asgi==0.48b0
+opentelemetry-instrumentation-fastapi==0.48b0
+opentelemetry-proto==1.27.0
+opentelemetry-sdk==1.27.0
+opentelemetry-semantic-conventions==0.48b0
+opentelemetry-util-http==0.48b0
+orjson==3.10.7
+overrides==7.7.0
+packaging==24.1
+pandas==2.2.3
+pillow==10.4.0
+posthog==3.6.6
+protobuf==4.25.5
+pyasn1==0.6.1
+pyasn1_modules==0.4.1
+pydantic==2.9.2
+pydantic-settings==2.5.2
+pydantic_core==2.23.4
+pydub==0.25.1
+Pygments==2.18.0
+pyparsing==3.1.4
+PyPika==0.48.9
+pyproject_hooks==1.2.0
+python-dateutil==2.9.0.post0
+python-dotenv==1.0.1
+python-multipart==0.0.12
+pytz==2024.2
+PyYAML==6.0.2
+regex==2024.9.11
+requests==2.32.3
+requests-oauthlib==2.0.0
+rich==13.9.1
+rsa==4.9
+ruff==0.6.8
+semantic-version==2.10.0
+setuptools==75.1.0
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+SQLAlchemy==2.0.35
+starlette==0.38.6
+sympy==1.13.3
+tenacity==8.5.0
+tiktoken==0.7.0
+tokenizers==0.20.0
+tomlkit==0.12.0
+tqdm==4.66.5
+typer==0.12.5
+typing-inspect==0.9.0
+typing_extensions==4.12.2
+tzdata==2024.2
+urllib3==2.2.3
+uvicorn==0.31.0
+uvloop==0.20.0
+watchfiles==0.24.0
+websocket-client==1.8.0
+websockets==12.0
+wrapt==1.16.0
+yarl==1.13.1
+zipp==3.20.2
--- a/utility/__init__.py
+++ b/utility/__init__.py
--- a/utility/stock_data_helper.py
+++ b/utility/stock_data_helper.py
+import config
+import json
+import os
+
+EXCHANGES_FILEPATH = config.get_exchanges_filepath()
+TICKERS_DIRECTORY = config.get_tickers_directory()
+STOCKS_DIRECTORY = config.get_stocks_directory()
+FUSED_DATA_DIRECTORY = config.get_fused_data_directory()
+
+def get_stock_codes(exchange_code:str) -> list[str]:
+
+    try:
+        with open(TICKERS_DIRECTORY + f"{exchange_code}.json", 'r') as file:
+            data = json.load(file)
+    except:
+        print("File not found")
+        exit()
+
+    # return [item['Code'] for item in data]
+    return data
+
+def get_stock_data(exchange_code:str, stock_code:str):
+
+    try:
+        with open(STOCKS_DIRECTORY + f"{exchange_code}/{stock_code}.json", 'r') as file:
+            data = json.load(file)
+    except:
+        print("File not found")
+        exit()
+    
+    return data
+
+def get_fused_data(exchange_code:str, stock_code:str):
+
+    try:
+        with open(FUSED_DATA_DIRECTORY + f"{exchange_code}/{stock_code}.json", 'r') as file:
+            data = json.load(file)
+    except:
+        print("File not found")
+        exit()
+    
+    return data
+
+def create_fused_data(exchange_code:str):
+    stock_codes = get_stock_codes(exchange_code)
+
+    if not os.path.exists(f"{FUSED_DATA_DIRECTORY}{exchange_code}/"):
+        os.makedirs(f"{FUSED_DATA_DIRECTORY}{exchange_code}/")
+
+    for stock in stock_codes:
+        stock['data'] = get_stock_data(exchange_code, stock['Code'])
+    
+        with open(f"{FUSED_DATA_DIRECTORY}{exchange_code}/" + f'{stock['Code']}.json', 'w') as filename:
+            json.dump(stock_codes, filename)
+        
+    return stock_codes
\ No newline at end of file