Skip to content
Snippets Groups Projects
Commit 75cc9994 authored by David Matthew Antonio's avatar David Matthew Antonio
Browse files

Initial commit

parents
No related branches found
No related tags found
No related merge requests found
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
.pdm.toml
.pdm-python
.pdm-build/
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
secret.py
Stupid_credentials.txt
data/
chroma_langchain_db/
\ No newline at end of file
# Stock and LLM
I have a stooockk, i have an L L M, ugghhhh, penpineappleapplepen
## Requirements
**1. API KEYS**
The project requires API keys for EODHD and OpenAI to work, they are to be stored in a python file named `secret.py`
secret.py example:
```python
APIKEYS = {
'OPENAI':'sk-a7388ddffa0bc5fb5a8e4f2128cfaea336e885080fade2f45568ee36770982dd',
'EODHD':'c4134600f900d3d.5544920',
}
```
_Yes these keys are fake_
**2. Dependencies**
You also need some dependencies for **Python 3.12**, they can be installed with the included requirements.txt.
## Running the program
Before running the program, configurations can be made by accessing `config.py`.
You also should run the `initializer.py` file, this will collect data from the EODHD API.
Finally you can go ahead and run app.py to start the gradio app.
## Limitations
Only stock exchange available is from the PSE, this is due to api limits
Only monthly stock values are available, theres too much data if the period is daily.
app.py 0 → 100644
# from langchain_core.prompts import ChatPromptTemplate
# from langchain_openai import ChatOpenAI
# from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
import config
import interface
import initializer
def main():
if (config.INITIALIZE_ON_RUN):
initializer.run()
gradio_app = interface.initialize()
gradio_app.launch(share = config.GRADIO_PUBLIC)
if __name__ == "__main__":
main()
\ No newline at end of file
# Base path for all local data
BASE_DATA_PATH = 'data/'
# Directories must end with a '/'
TICKERS_DIRECTORY = 'tickers/'
STOCKS_DIRECTORY = 'stocks/'
FUSED_DATA_DIRECTORY = 'fused/'
# Files may include its following path and must end with the respective extension
EXCHANGES_FILEPATH = 'exchanges.json'
# Some flags
INITIALIZE_ON_RUN = True
GRADIO_PUBLIC = True
LLM_MODEL = "gpt-4o-mini"
def get_tickers_directory() -> str:
return BASE_DATA_PATH + TICKERS_DIRECTORY
def get_stocks_directory() -> str:
return BASE_DATA_PATH + STOCKS_DIRECTORY
def get_fused_data_directory() -> str:
return BASE_DATA_PATH + FUSED_DATA_DIRECTORY
def get_exchanges_filepath() -> str:
return BASE_DATA_PATH + EXCHANGES_FILEPATH
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
import config
import os
try:
import secret
OPENAIKEY = secret.APIKEYS['OPENAI']
except:
OPENAIKEY = os.environ['OPENAI']
model = ChatOpenAI(model=config.LLM_MODEL, api_key = OPENAIKEY)
background_context = ChatPromptTemplate.from_messages([
("system", """You are an assistant specialized in stock data. You may receive human prompts asking for the price of a specific stock name or code.
Accompanying the human prompt will be context.
For questions specific about a certain stock coded, the context usually is a json formatted information about the stock, from its name, to the highs and lows in each date. Additionally accompanying the json data is the metadata.
Different questions that are more broad, such as what are the available stocks may return different contexts, such as a list of the stocks.
Should the context given not provide sufficient information, you may admit that you lack the knowledge in your response to the user prompt.
"""),
("human", "Context:{context}\n\n###\n\nQuery: {query}")])
def send_prompt(message:str, context:str):
chain = background_context | model
return chain.invoke({
"query": message,
"context": context,
})
from langchain_chroma import Chroma
from chromadb.config import DEFAULT_TENANT, DEFAULT_DATABASE, Settings
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings
from utility import stock_data_helper
from langchain_text_splitters import RecursiveJsonSplitter
import os
try:
import secret
OPENAIKEY = secret.APIKEYS['OPENAI']
except:
OPENAIKEY = os.environ['OPENAI']
def get_vectordb():
embeddings = OpenAIEmbeddings(model="text-embedding-3-large", api_key = OPENAIKEY)
tickers = stock_data_helper.get_stock_codes("PSE")
stocks = [stock_data_helper.get_stock_data("PSE", i['Code']) for i in tickers]
vector_store = Chroma(
collection_name="Stocks",
embedding_function=embeddings,
persist_directory="./chroma_langchain_db", # Where to save data locally, remove if not necessary
)
print('Now Creating Documents')
documents = []
for tick in tickers:
stock_data = stock_data_helper.get_stock_data("PSE", tick['Code'])
document = Document(page_content=
f"""
METADATA:
Code: {tick['Code']},
Name: {tick['Name']},
Country: {tick['Country']}
!HISTORICAL DATA:
{str(stock_data)}
""",
metadata ={
"Code": tick['Code'],
"Name": tick['Name'],
"Country": tick['Country'],
}
)
documents.append(document)
general_stock_context = Document(page_content=
f"""
METADATA:
Name: "General/All stocks/tickers/companies in the philippines",
Country: "Philippines"
!HISTORICAL DATA:
{str(stock_data_helper.get_stock_codes)}
""",
metadata ={
"Name": "General/All stocks/tickers/companies in the philippines",
}
)
documents.append(general_stock_context)
print('Now adding documents to db')
vector_store.add_documents(documents=documents)
return vector_store
import datetime
import requests
import config
import secret
import json
import os
try:
import secret
EODHD_KEY = secret.APIKEYS['EODHD']
except:
EODHD_KEY = os.environ['EODHD']
EXCHANGES_FILEPATH = config.get_exchanges_filepath()
TICKERS_DIRECTORY = config.get_tickers_directory()
STOCKS_DIRECTORY = config.get_stocks_directory()
def search_query(query:str):
url = f'https://eodhd.com/api/search/{query}?api_token={EODHD_KEY}&fmt=json'
try:
return requests.get(url).json()
except Exception as e:
print(e)
exit()
def retrieve_exchanges():
url = f'https://eodhd.com/api/exchanges-list/?api_token={EODHD_KEY}&fmt=json'
try:
exchanges_list = requests.get(url).json()
except Exception as e:
print(e)
exit()
with open(EXCHANGES_FILEPATH, 'w') as filename:
json.dump(exchanges_list, filename)
return exchanges_list
def retrieve_tickers(exchange_code:str):
url = f'https://eodhd.com/api/exchange-symbol-list/{exchange_code}?api_token={EODHD_KEY}&fmt=json'
try:
tickers_list = requests.get(url).json()
except Exception as e:
print(e)
exit()
if not os.path.exists(TICKERS_DIRECTORY):
os.makedirs(TICKERS_DIRECTORY)
with open(TICKERS_DIRECTORY + f'{exchange_code}.json', 'w') as filename:
json.dump(tickers_list, filename)
return tickers_list
def retrieve_eod_historical_data(exchange_code:str, stock_code:str, period:int = 1):
current_date = datetime.date.today()
earlier_date = datetime.date(year = current_date.year - 1, month = current_date.month, day = current_date.day)
url = f'https://eodhd.com/api/eod/{stock_code}.{exchange_code}?from={earlier_date}&to={current_date}&period=m&api_token={EODHD_KEY}&fmt=json'
try:
stock_prices_list = requests.get(url).json()
except Exception as e:
print(e)
exit()
if not os.path.exists(f"{STOCKS_DIRECTORY}{exchange_code}"):
os.makedirs(f"{STOCKS_DIRECTORY}{exchange_code}")
with open(f"{STOCKS_DIRECTORY}{exchange_code}/" + f'{stock_code}.json', 'w') as filename:
json.dump(stock_prices_list, filename)
return stock_prices_list
\ No newline at end of file
from controllers import stocks
from utility import stock_data_helper
import multiprocessing as mp
def run():
try:
stocks.retrieve_exchanges()
except:
print("Cannot retrieve exchanges from the API")
exit()
else:
print("Exchange list updated!")
# Due to API Limits, only Philippine stocks are considered
try:
tickers = stocks.retrieve_tickers("PSE")
except:
print("Cannot retrieve tickers from the API")
exit()
else:
print("Ticker list updated!")
tickers = stock_data_helper.get_stock_codes("PSE")
processes = [mp.Process(target = stocks.retrieve_eod_historical_data, args = ("PSE", i["Code"])) for i in tickers]
for p in processes:
p.start()
for p in processes:
p.join()
# try:
# stock_data_helper.create_fused_data("PSE")
# except Exception as exception:
# print(exception)
if __name__ == "__main__":
run()
\ No newline at end of file
import gradio as gr
from controllers import rag, prompt
vector_db = rag.get_vectordb()
def get_prompt_response(query:str):
context = vector_db.similarity_search(query,k=10)
ai_message = prompt.send_prompt(query, str(context))
return ai_message.content
def initialize():
with gr.Blocks() as app:
gr.Markdown("# PSE Stock LLM App")
gr.Markdown("> Only PSE Stocks are supported, theres a limit on the API. Also only monthly data is available, daily would be too much")
main_chat_input = gr.Textbox(label="Start Chatting:")
# Dropdown menu
# dropdown = gr.Dropdown(
# label="Choose an option:",
# choices=["Option 1", "Option 2", "Option 3"],
# value="Option 1" # Default value
# )
main_chat_submit_button = gr.Button("Send")
# Output display
main_output_text = gr.Textbox(label="Output", interactive=False)
# Define the interaction
main_chat_submit_button.click(
fn=get_prompt_response,
inputs=[main_chat_input],
outputs=main_output_text
)
return app
aiofiles==23.2.1
aiohappyeyeballs==2.4.3
aiohttp==3.10.8
aiosignal==1.3.1
annotated-types==0.7.0
anyio==4.6.0
asgiref==3.8.1
attrs==24.2.0
backoff==2.2.1
bcrypt==4.2.0
build==1.2.2
cachetools==5.5.0
certifi==2024.8.30
charset-normalizer==3.3.2
chroma-hnswlib==0.7.6
chromadb==0.5.11
click==8.1.7
coloredlogs==15.0.1
contourpy==1.3.0
cycler==0.12.1
dataclasses-json==0.6.7
Deprecated==1.2.14
distro==1.9.0
durationpy==0.9
fastapi==0.115.0
ffmpy==0.4.0
filelock==3.16.1
flatbuffers==24.3.25
fonttools==4.54.1
frozenlist==1.4.1
fsspec==2024.9.0
google-auth==2.35.0
googleapis-common-protos==1.65.0
gradio==4.44.1
gradio_client==1.3.0
greenlet==3.1.1
grpcio==1.66.2
h11==0.14.0
httpcore==1.0.6
httptools==0.6.1
httpx==0.27.2
huggingface-hub==0.25.1
humanfriendly==10.0
idna==3.10
importlib_metadata==8.4.0
importlib_resources==6.4.5
Jinja2==3.1.4
jiter==0.5.0
jsonpatch==1.33
jsonpointer==3.0.0
kiwisolver==1.4.7
kubernetes==31.0.0
langchain==0.3.1
langchain-chroma==0.1.4
langchain-community==0.3.1
langchain-core==0.3.7
langchain-openai==0.2.1
langchain-text-splitters==0.3.0
langsmith==0.1.129
lark==1.2.2
markdown-it-py==3.0.0
MarkupSafe==2.1.5
marshmallow==3.22.0
matplotlib==3.9.2
mdurl==0.1.2
mmh3==5.0.1
monotonic==1.6
mpmath==1.3.0
multidict==6.1.0
mypy-extensions==1.0.0
numpy==1.26.4
oauthlib==3.2.2
onnxruntime==1.19.2
openai==1.51.0
opentelemetry-api==1.27.0
opentelemetry-exporter-otlp-proto-common==1.27.0
opentelemetry-exporter-otlp-proto-grpc==1.27.0
opentelemetry-instrumentation==0.48b0
opentelemetry-instrumentation-asgi==0.48b0
opentelemetry-instrumentation-fastapi==0.48b0
opentelemetry-proto==1.27.0
opentelemetry-sdk==1.27.0
opentelemetry-semantic-conventions==0.48b0
opentelemetry-util-http==0.48b0
orjson==3.10.7
overrides==7.7.0
packaging==24.1
pandas==2.2.3
pillow==10.4.0
posthog==3.6.6
protobuf==4.25.5
pyasn1==0.6.1
pyasn1_modules==0.4.1
pydantic==2.9.2
pydantic-settings==2.5.2
pydantic_core==2.23.4
pydub==0.25.1
Pygments==2.18.0
pyparsing==3.1.4
PyPika==0.48.9
pyproject_hooks==1.2.0
python-dateutil==2.9.0.post0
python-dotenv==1.0.1
python-multipart==0.0.12
pytz==2024.2
PyYAML==6.0.2
regex==2024.9.11
requests==2.32.3
requests-oauthlib==2.0.0
rich==13.9.1
rsa==4.9
ruff==0.6.8
semantic-version==2.10.0
setuptools==75.1.0
shellingham==1.5.4
six==1.16.0
sniffio==1.3.1
SQLAlchemy==2.0.35
starlette==0.38.6
sympy==1.13.3
tenacity==8.5.0
tiktoken==0.7.0
tokenizers==0.20.0
tomlkit==0.12.0
tqdm==4.66.5
typer==0.12.5
typing-inspect==0.9.0
typing_extensions==4.12.2
tzdata==2024.2
urllib3==2.2.3
uvicorn==0.31.0
uvloop==0.20.0
watchfiles==0.24.0
websocket-client==1.8.0
websockets==12.0
wrapt==1.16.0
yarl==1.13.1
zipp==3.20.2
import config
import json
import os
EXCHANGES_FILEPATH = config.get_exchanges_filepath()
TICKERS_DIRECTORY = config.get_tickers_directory()
STOCKS_DIRECTORY = config.get_stocks_directory()
FUSED_DATA_DIRECTORY = config.get_fused_data_directory()
def get_stock_codes(exchange_code:str) -> list[str]:
try:
with open(TICKERS_DIRECTORY + f"{exchange_code}.json", 'r') as file:
data = json.load(file)
except:
print("File not found")
exit()
# return [item['Code'] for item in data]
return data
def get_stock_data(exchange_code:str, stock_code:str):
try:
with open(STOCKS_DIRECTORY + f"{exchange_code}/{stock_code}.json", 'r') as file:
data = json.load(file)
except:
print("File not found")
exit()
return data
def get_fused_data(exchange_code:str, stock_code:str):
try:
with open(FUSED_DATA_DIRECTORY + f"{exchange_code}/{stock_code}.json", 'r') as file:
data = json.load(file)
except:
print("File not found")
exit()
return data
def create_fused_data(exchange_code:str):
stock_codes = get_stock_codes(exchange_code)
if not os.path.exists(f"{FUSED_DATA_DIRECTORY}{exchange_code}/"):
os.makedirs(f"{FUSED_DATA_DIRECTORY}{exchange_code}/")
for stock in stock_codes:
stock['data'] = get_stock_data(exchange_code, stock['Code'])
with open(f"{FUSED_DATA_DIRECTORY}{exchange_code}/" + f'{stock['Code']}.json', 'w') as filename:
json.dump(stock_codes, filename)
return stock_codes
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment