diff --git a/.gitignore b/.gitignore index 23bb5db..e204fb9 100644 --- a/.gitignore +++ b/.gitignore @@ -2,15 +2,18 @@ /docs/build/ /docs/lib/ /docs/bin/ - # Byte-compiled / optimized / DLL files __pycache__/ -*.py[cod] +*.py[codz] *$py.class +# C extensions +*.so + # folders generated by setup.py build dist +app_uploaded_files/ /logai/data/ /logai/.ipynb_checkpoints/ /results/ @@ -32,10 +35,206 @@ dist /logai/tutorials/datasets/ /.DS_Store /gui/.DS_Store -/.idea +# Distribution / packaging +.venv +venv +.DS_Store +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST -docs/.DS_Store -/docs/_build/ -/.pytest_cache/ -/logai/.idea/ +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py.cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock +#poetry.toml + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. +# https://pdm-project.org/en/latest/usage/project/#working-with-version-control +#pdm.lock +#pdm.toml +.pdm-python +.pdm-build/ + +# pixi +# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. +#pixi.lock +# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one +# in the .venv directory. It is recommended not to include this directory in version control. +.pixi + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.envrc +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Cursor +# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to +# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data +# refer to https://docs.cursor.com/context/ignore-files +.cursorignore +.cursorindexingignore + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md deleted file mode 100644 index 8287f73..0000000 --- a/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,112 +0,0 @@ - -# Salesforce Open Source Community Code of Conduct - -## About the Code of Conduct - -Equality is a core value at Salesforce. We believe a diverse and inclusive -community fosters innovation and creativity, and are committed to building a -culture where everyone feels included. - -Salesforce open-source projects are committed to providing a friendly, safe, and -welcoming environment for all, regardless of gender identity and expression, -sexual orientation, disability, physical appearance, body size, ethnicity, nationality, -race, age, religion, level of experience, education, socioeconomic status, or -other similar personal characteristics. - -The goal of this code of conduct is to specify a baseline standard of behavior so -that people with different social values and communication styles can work -together effectively, productively, and respectfully in our open source community. -It also establishes a mechanism for reporting issues and resolving conflicts. - -All questions and reports of abusive, harassing, or otherwise unacceptable behavior -in a Salesforce open-source project may be reported by contacting the Salesforce -Open Source Conduct Committee at ossconduct@salesforce.com. - -## Our Pledge - -In the interest of fostering an open and welcoming environment, we as -contributors and maintainers pledge to making participation in our project and -our community a harassment-free experience for everyone, regardless of gender -identity and expression, sexual orientation, disability, physical appearance, -body size, ethnicity, nationality, race, age, religion, level of experience, education, -socioeconomic status, or other similar personal characteristics. - -## Our Standards - -Examples of behavior that contributes to creating a positive environment -include: - -* Using welcoming and inclusive language -* Being respectful of differing viewpoints and experiences -* Gracefully accepting constructive criticism -* Focusing on what is best for the community -* Showing empathy toward other community members - -Examples of unacceptable behavior by participants include: - -* The use of sexualized language or imagery and unwelcome sexual attention or -advances -* Personal attacks, insulting/derogatory comments, or trolling -* Public or private harassment -* Publishing, or threatening to publish, others' private information—such as -a physical or electronic address—without explicit permission -* Other conduct which could reasonably be considered inappropriate in a -professional setting -* Advocating for or encouraging any of the above behaviors - -## Our Responsibilities - -Project maintainers are responsible for clarifying the standards of acceptable -behavior and are expected to take appropriate and fair corrective action in -response to any instances of unacceptable behavior. - -Project maintainers have the right and responsibility to remove, edit, or -reject comments, commits, code, wiki edits, issues, and other contributions -that are not aligned with this Code of Conduct, or to ban temporarily or -permanently any contributor for other behaviors that they deem inappropriate, -threatening, offensive, or harmful. - -## Scope - -This Code of Conduct applies both within project spaces and in public spaces -when an individual is representing the project or its community. Examples of -representing a project or community include using an official project email -address, posting via an official social media account, or acting as an appointed -representative at an online or offline event. Representation of a project may be -further defined and clarified by project maintainers. - -## Enforcement - -Instances of abusive, harassing, or otherwise unacceptable behavior may be -reported by contacting the Salesforce Open Source Conduct Committee -at ossconduct@salesforce.com. All complaints will be reviewed and investigated -and will result in a response that is deemed necessary and appropriate to the -circumstances. The committee is obligated to maintain confidentiality with -regard to the reporter of an incident. Further details of specific enforcement -policies may be posted separately. - -Project maintainers who do not follow or enforce the Code of Conduct in good -faith may face temporary or permanent repercussions as determined by other -members of the project's leadership and the Salesforce Open Source Conduct -Committee. - -## Attribution - -This Code of Conduct is adapted from the [Contributor Covenant][contributor-covenant-home], -version 1.4, available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html. -It includes adaptions and additions from [Go Community Code of Conduct][golang-coc], -[CNCF Code of Conduct][cncf-coc], and [Microsoft Open Source Code of Conduct][microsoft-coc]. - -This Code of Conduct is licensed under the [Creative Commons Attribution 3.0 License][cc-by-3-us]. - -[contributor-covenant-home]: https://www.contributor-covenant.org (https://www.contributor-covenant.org/) -[golang-coc]: https://golang.org/conduct -[cncf-coc]: https://github.com/cncf/foundation/blob/master/code-of-conduct.md -[microsoft-coc]: https://opensource.microsoft.com/codeofconduct/ -[cc-by-3-us]: https://creativecommons.org/licenses/by/3.0/us/ \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..4bb780d --- /dev/null +++ b/LICENSE @@ -0,0 +1,28 @@ +BSD 3-Clause License + +Copyright (c) 2025, Arumugam + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/MANIFEST.in b/MANIFEST.in index 1f5df01..64ad321 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,6 +1 @@ -recursive-include logai *.yaml *.json - -recursive-exclude logai/results * -recursive-exclude logai/data/ * - -include requirements.txt +include README.md LICENSE diff --git a/README.md b/README.md index 5eced54..e6415a1 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,6 @@ -


@@ -12,295 +8,76 @@ For full license text, see the LICENSE file in the repo root or https://opensour

-
- - Latest Release - - - pages-build-deployment - - - Latest Release - - - PyPI version - - - license - -
- -# LogAI: A Library for Log Analytics and Intelligence - ## Table of Contents -* [Introduction](#introduction) -* [Installation](#installation) -* [Getting Started](#getting-started) - * [Explore LogAI GUI Portal](#explore-logai-gui-portal) - * [Run Simple Time-series Anomaly Detection Application](#run-simple-time-series-anomaly-detection-application) - * [Build Customized LogAI Applications](#build-customized-logai-applications) - * [Deep-learning Anomaly Detection Benchmarking](#deep-learning-anomaly-detection-benchmarking) -* [Documentation](#documentation) -* [Technical Report and Citing LogAI](#technical-report-and-citing-logai) -* [Contact](#contact) -* [License](#license) +- [RDK\_One3B](#rdk_one3b) + - [Table of Contents](#table-of-contents) + - [Introduction](#introduction) +- [Getting Started](#getting-started) + - [Installation](#installation) + - [Explore RDK\_One3B GUI Portal](#explore-rdk_one3b-gui-portal) + - [Log Summarization](#log-summarization) + - [Log Parsing](#log-parsing) + - [Log Clustering](#log-clustering) + - [Log Anomaly Detection](#log-anomaly-detection) + - [Log Report with LLaMa](#log-report-with-llama) + - [Reference](#reference) + - [License](#license) ## Introduction -LogAI is a one-stop open source library for log analytics and intelligence. LogAI supports various log analytics and log intelligence tasks such as log summarization, log clustering, log anomaly detection and more. It adopts the OpenTelemetry data model, to enable compatibility with different log management platforms. LogAI provides a unified model interface and integrates popular time-series, statistical learning and deep -learning models. Alongside this, LogAI also provides an out-of-the-box GUI toolkit for users to conduct interactive log -analysis. With LogAI, we can also easily benchmark popular ML and deep-learning algorithms for log anomaly detection -without putting in redundant effort to process the logs. We have opensourced LogAI to facilitate a wide range of -applications benefiting both academic research and industrial prototyping. - -The following table compares LogAI with several existing log analysis Tools, include both commercial log management platforms -like NewRelic and DataDog, or popular log analysis open source tools on Github. - -| Coverage | LogAI | NewRelic Log Monitoring | DataDog Log Explorer | logparser | loglizer | deep-loglizer | log3C | -| ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | -| OpenTelemetry log data model | :white_check_mark: | :white_check_mark: | :white_check_mark: | | | | | | -| Unified data loader and preprocessing | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | :white_check_mark: | | -| Auto log parsing | :white_check_mark: | :white_check_mark: | :white_check_mark: | | | -| Log clustering | :white_check_mark: | :white_check_mark: | :white_check_mark: | | | | :white_check_mark: | -| Log anomaly detection - time-series | :white_check_mark: | :white_check_mark: | :white_check_mark: | | | | | | -| Log anomaly detection - traditional ML | :white_check_mark: | | | | :white_check_mark: | | | -| Log anomaly detection - deep Learning | :white_check_mark: | | | | :white_check_mark: | :white_check_mark: | | -| Huggingface integration | :white_check_mark: | | | | | | | -| GUI for result visualization | :white_check_mark: | :white_check_mark: | :white_check_mark: | | | | | +# Getting Started ## Installation -### Quick Install -You can install LogAI core library using `pip install`: - ```shell -# Check out LogAI code repo from Github -git clone https://github.com/salesforce/logai.git -cd logai +# Check out rdk_one3b code repo from Github +git clone https://git.yo-digital.com/hackfest6/rdk-one3b.git +cd rdk-one3b # [Optional] Create virtual environment python3 -m venv venv source venv/bin/activate -# Install LogAI -pip install logai - -``` - -### Install Optional Dependencies -LogAI core library is light-weight with limited dependent packages installed. Users can install optional dependencies -to enable extended functionalities of LogAI. - -**Deep Learning Log Analysis**. To conduct deep learning model related tasks and run benchmarking, -please install extra requirements by `pip install "logai[deep-learning]"`. - -**Enable LogAI GUI Portal***. To use LogAI GUI portal, -please install extra requirements by `pip install "logai[gui]"`. - -**LogAI Development**. To contribute to LogAI development, build and test code changes, -please install extra requirements by `pip install "logai[dev]"`. - -**Complete Installation**. you can install the full list of dependencies by `pip install "logai[all]"`. - -### Known Issues - -> :warning: You may see `Resource punkt not found` while using LogAI. You can download `punkt` -> package from NLTK to solve the problem. -> ```shell -> python -m nltk.downloader punkt -> ``` - -## Getting Started - -Below we briefly introduce several ways to explore and use LogAI, including exploring LogAI GUI -portal, benchmarking deep-learning based log anomaly detection using LogAI, and building your -own log analysis application with LogAI. +# install dependencies +pip install -r requirements.txt +pip install nltk +python -m nltk.downloader punkt_tab -### Explore LogAI GUI Portal +# make sure to add current root to PYTHONPATH +export PYTHONPATH='.' +python3 logai/gui/application.py # Run local plotly dash server. -You can also start a local LogAI service and use the GUI portal to explore LogAI. - -```shell - -# Check out LogAI code repo from Github -git clone https://github.com/salesforce/logai.git -cd logai - -# [Optional] Create virtual environment -python3 -m venv venv # create virtual environment -source venv/bin/activate # activate virtual env - -# install LogAI and GUI dependencies -pip install ".[dev]" -pip install ".[gui]" - -# Start LogAI service -export PYTHONPATH='.' # make sure to add current root to PYTHONPATH -python3 gui/application.py # Run local plotly dash server. ``` -Then open the LogAI portal via http://localhost:8050/ or http://127.0.0.1:8050/ in your browser: - -![portal](img/logai_gui_landing.png) - -The control panel is on the left side of the page. There are three applications you can choose from: -Log Summarization, Log Clustering and Anomaly Detection. - -#### Control Panel - -**File Setting**. You can select the log type and log files to be processed. Now LogAI supports three -public datasets: HDFS, BGL and HealthApp. For each log type we included several sample log data. After -log file selected, you can choose the attributes you want to be involved in log processing. The selected -attributes will be treated as structured log attributes. - -**Algorithm Setting**. For different applications, the algorithms options may be different. -For example, auto-log parsing algorithms are utilized for log summarization, but log clustering uses auto-parsing algorithms, - vectorization algorithms, categorical encoding and clustering algorithms. You can select an - algorithm and change the parameters in each algorithm section. After algorithm configuration, simply click "run" to -run the application. - -#### Log Summarization - -Log summarization App summarize and group the raw logs by log patterns and attributes. You can click on -each log pattern and see what the pattern looks like and the dynamic values in each position. You can also -see the chart of occurrance trend of this pattern on the right side. - -![log summarization](img/logai_summarization_res.png) +Then open the RDK_One3B App via http://localhost:8050/ or http://127.0.0.1:8050/ in your browser -#### Log Clustering +## Explore RDK_One3B GUI Portal -Log clustering App groups raw logs into clusters by calculating the semantic representation of each logline. -Then using clustering algorithms to generate log clusters. In this example, we choose k-mean where `k==8` to -generate 8 clusters. The result is shown as a pie chart and you can click each portion of the pie chart to check -the raw logs in this cluster. +### Log Summarization +We use the various values obtained through telemetry, such as `SSID.Status`, `Radio.Status`, etc., to summarise what state the device has been in the last few hours as per the logs obtained. +![landing_page](img/RDK_One3B_LandingPage.png) -![log clustering](img/logai_clustering_res.png) +### Log Parsing +**Drain3** uses a tree-based algorithm to extract structured **log templates** from raw log messages. This reduces high-cardinality logs into generalized patterns. After parsing, the logs are transformed into a feature-rich format using template frequency, token-level statistics, temporal features and metadata (e.g., source, severity, etc.). This prepares the data for classification and clustering algorithms. -#### Anomaly Detection +We parse the various logs present under rdklogs with the help of drain3 to see what logs have been occurring the most frequently in the last few hours on the device. It is done on a single file basis here. +![landing_page](img/RDK_One3B_LogParsing.png) -Log anomaly detection App conduct log anomaly detection tasks. Similar to log clustering, log anomaly detection -also needs to extract information from raw logs and generate representation of loglines. Depend on the type of anomaly detection, -The representation can be different. -**Time-series anomaly detection**. If we use time-series algorithm like ETS, the raw logs will be converted -into log counter vectors by given time interval. Then ETS is performed on the generated log counter vectors and detect -anomalous timestamps on the counter vector time-series. - -**Semantic anomaly detection**. If we use unsupervised outlier detection algorithms such as One-class SVM, the raw logs will -be converted into semantic vectors and feed the One-class SVM model. Then the model will detect anomalous loglines. - -![log anomaly detection](img/logai_anomaly_detection.png) - -LogAI GUI portal is just an example to demo LogAI capabilities. We know this may not be the best way to visualize the -results and there might be bugs in how the results are displayed. We will keep working with the open source community -to improve usability of the portal. Any feedbacks and contributions are welcome :blush:. - -### Run Simple Time-series Anomaly Detection Application - -You can also use LogAI in more programtic ways. LogAI supports configuration files in `.json` or `.yaml`. -Below is a sample `log_anomaly_detection_config.json` configuration for anomaly detection application. -Make sure to set `filepath` to the target log dataset file path. - -```json -{ - "open_set_data_loader_config": { - "dataset_name": "HDFS", - "filepath": "" - }, - "preprocessor_config": { - "custom_delimiters_regex":[] - }, - "log_parser_config": { - "parsing_algorithm": "drain", - "parsing_algo_params": { - "sim_th": 0.5, - "depth": 5 - } - }, - "feature_extractor_config": { - "group_by_category": ["Level"], - "group_by_time": "1s" - }, - "log_vectorizer_config": { - "algo_name": "word2vec" - }, - "categorical_encoder_config": { - "name": "label_encoder" - }, - "anomaly_detection_config": { - "algo_name": "one_class_svm" - } - } -``` - -Then to run log anomaly detection. You can simply create below python script: - -```python -import json - -from logai.applications.application_interfaces import WorkFlowConfig -from logai.applications.log_anomaly_detection import LogAnomalyDetection - -# path to json configuration file -json_config = "./log_anomaly_detection_config.json" - -# Create log anomaly detection application workflow configuration -config = json.loads(json_config) -workflow_config = WorkFlowConfig.from_dict(config) - -# Create LogAnomalyDetection Application for given workflow_config -app = LogAnomalyDetection(workflow_config) - -# Execute App -app.execute() - -``` - -Then you can check anomaly detection results by calling `app.anomaly_results`. - -For full context of this example please check -[Tutorial: Use Log Anomaly Detection Application](./examples/jupyter_notebook/log_anomaly_detection_example.ipynb). - -### Build Customized LogAI Applications -You can build your own customized log analysis applications using LogAI. Here we show two examples: - -* [Tutorial: Log Clustering Using LogAI](./examples/jupyter_notebook/tutorial_log_clustering.ipynb) -* [Tutorial: Log Anomaly Detection Using LogAI](./examples/jupyter_notebook/tutorial_log_anomaly_detection.ipynb) - -### Deep-learning Anomaly Detection Benchmarking - -LogAI can be used to benchmark deep-learning anomaly detection results. -A [tutorial](examples/jupyter_notebook/tutorial_deep_ad.md) is provided for -Anomaly Detection Benchmarking using LSTM anomaly detector for HDFS Dataset. More examples of deep-learning anomaly -detection benchmarking on different datasets and algorithms can be found in -[Deep Anomaly Detection Benchmarking Examples](examples/jupyter_notebook/nn_ad_benchmarking). - -## Documentation - -For more detail about LogAI library and advanced use cases, please visit -[LogAI Documentation](https://opensource.salesforce.com/logai). +### Log Clustering +**TensorFlow Decision Forests** (TF-DF) is used to train a supervised model on labeled logs. It's a scalable, interpretable tree-based model ideal for classifying logs into categories like `INFO`, `ERROR`, `SECURITY_ALERT` and detecting known failure types or operational events + +We form clusters for the various logs present under rdklogs with the help of drain3 to see what log patterns have been occurring the most frequently in the last few hours on the device. It is done on both a single file and multi file basis here. +![landing_page](img/RDK_One3B_LogClustering.png) -## Technical Report and Citing LogAI +### Log Anomaly Detection +After parsing, the logs are transformed into a feature-rich format using template frequency, token-level statistics, temporal features and metadata (e.g., source, severity, etc.). This prepares the data for classification and clustering algorithms. -You can find more details about LogAI in the [technical report](https://arxiv.org/abs/2301.13415). -If you're using LogAI in your research or applications, please cite using this BibTeX: +![landing_page](img/RDK_One3B_LogSummarization.png) -``` -@misc{https://doi.org/10.48550/arxiv.2301.13415, - title = {LogAI: A Library for Log Analytics and Intelligence}, - author = {Cheng, Qian and Saha, Amrita and Yang, Wenzhuo and Liu, Chenghao and Sahoo, Doyen and Hoi, Steven}, - publisher = {arXiv}, - year = {2023}, - doi = {10.48550/ARXIV.2301.13415}, - url = {https://arxiv.org/abs/2301.13415}, - copyright = {arXiv.org perpetual, non-exclusive license} -} - -``` - -## Contact -If you have any questions, comments or suggestions, -please do not hesitate to contact us at [logai@salesforce.com](logai@salesforce.com). +### Log Report with LLaMa +Using LLaMA (a large language model), the pipeline generates natural language **summaries** of anomaly clusters, **explanations** of complex or unknown logs and AI-powered insights for DevOps or security teams +## Reference + [Salesforce LogAI](https://github.com/salesforce/logai) A Library for Log Analytics and Intelligence ## License [BSD 3-Clause License](LICENSE.txt) - diff --git a/SECURITY.md b/SECURITY.md deleted file mode 100644 index 8557797..0000000 --- a/SECURITY.md +++ /dev/null @@ -1,14 +0,0 @@ - -## Security - -Please report any security issue to [security@salesforce.com](mailto:security@salesforce.com) -as soon as it is discovered. This library limits its runtime dependencies in -order to reduce the total cost of ownership as much as can be, but all consumers -should remain vigilant and have their security stakeholders review all third-party -products (3PP) like this one and their dependencies. \ No newline at end of file diff --git a/column_list.txt b/column_list.txt new file mode 100644 index 0000000..e1ac034 --- /dev/null +++ b/column_list.txt @@ -0,0 +1,7 @@ +Report.Time +Report.mac +Report.Device.WiFi.SSID.1.Status +Report.Device.WiFi.Radio.1.Status +Report.Device.WiFi.SSID.2.Status +Report.Device.WiFi.Radio.2.Status + diff --git a/configs/__init__.py b/configs/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/configs/airties.json b/configs/airties.json new file mode 100644 index 0000000..5ee7199 --- /dev/null +++ b/configs/airties.json @@ -0,0 +1,39 @@ +{ + "_comment":"2025-06-03 15:21:20-[00352190]-[con]multiap-controller[20554]:[ctrl][tlv_parser] map_parse_assoc_wifi6_sta_status_tlv: sta[d2:4a:89:04:00:00] not found", + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%d %H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Module", "Submodule"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":["-", "[", "]"], + "custom_replace_list": {} + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} \ No newline at end of file diff --git a/configs/boottime.json b/configs/boottime.json new file mode 100644 index 0000000..e69de29 diff --git a/configs/ccspcujoagent_log.json b/configs/ccspcujoagent_log.json new file mode 100644 index 0000000..ec94e9a --- /dev/null +++ b/configs/ccspcujoagent_log.json @@ -0,0 +1,42 @@ +{ + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "CCSPCUJOAGENT\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} diff --git a/configs/cellularmanager_log.json b/configs/cellularmanager_log.json new file mode 100644 index 0000000..f40f326 --- /dev/null +++ b/configs/cellularmanager_log.json @@ -0,0 +1,42 @@ +{ + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "CELLULARMANAGER\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} diff --git a/configs/config_list.json b/configs/config_list.json new file mode 100644 index 0000000..797c289 --- /dev/null +++ b/configs/config_list.json @@ -0,0 +1,184 @@ +{ + "supported_files": [ + { + "name": "Airties", + "supported_config": "airties.json", + "supported_files": ["ArmConsolelog"] + }, + { + "name": "ArmConsole", + "supported_config": "console_log.json", + "supported_files": ["ArmConsolelog"] + }, + { + "name": "BootTime", + "supported_config": "boottime.json", + "supported_files": ["BootTime"] + }, + { + "name": "LMLite", + "supported_config": "lmlite.json", + "supported_files": ["LM"] + }, + { + "name": "PandM_Log", + "supported_config": "pandm_log.json", + "supported_files": ["PAMlog"] + }, + { + "name": "PSM_log", + "supported_config": "psm_log.json", + "supported_files": ["PSMlog"] + }, + { + "name": "WEBPA", + "supported_config": "webpa.json", + "supported_files": ["WEBPAlog"] + }, + { + "name": "WiFi_Log", + "supported_config": "wifi_log.json", + "supported_files": ["WiFiLog"] + }, + { + "name": "Telemetry_2", + "supported_config": "telemetry2.json", + "supported_files": ["telemetry2_0"] + }, + { + "name": "TelcoVoiceManager", + "supported_config": "telco_voice_mgr.json", + "supported_files": ["TELCOVOICEMANAGERLog"] + }, + { + "name": "StateInfoManager", + "supported_config": "state_info_manager.json", + "supported_files": ["STATEINFOMANAGERLog"] + }, + { + "name": "PARODUS", + "supported_config": "parodus.json", + "supported_files": ["PARODUSlog"] + }, + { + "name": "Syslog_NG", + "supported_config": "syslog_ng.json", + "supported_files": ["syslog"] + }, + { + "name": "CUJOAgent", + "supported_config": "cujo_agent.json", + "supported_files": ["CUJOAGENT"] + }, + { + "name": "Messages", + "supported_config": "console_log.json", + "supported_files": ["messages"] + }, + { + "name": "VLANManagerLog", + "supported_config": "vlanmanager_log.json", + "supported_files": ["VLANManagerLog"] + }, + { + "name": "WEBPAlog", + "supported_config": "webpa_log.json", + "supported_files": ["WEBPAlog"] + }, + { + "name": "CCSPCUJOAGENTlog", + "supported_config": "ccspcujoagent_log.json", + "supported_files": ["CCSPCUJOAGENTlog"] + }, + { + "name": "CELLULARMANAGERlog", + "supported_config": "cellularmanager_log.json", + "supported_files": ["CELLULARMANAGERLog"] + }, + { + "name": "EnrolmentAgentlog", + "supported_config": "enrollmentagent_log.json", + "supported_files": ["EnrolmentAgentLog"] + }, + { + "name": "ETHAGENTlog", + "supported_config": "ethagent_log.json", + "supported_files": ["ETHAGENTLog"] + }, + { + "name": "GponManagerlog", + "supported_config": "gponmanager_log.json", + "supported_files": ["GponManagerLog"] + }, + { + "name": "IPv6Privacylog", + "supported_config": "ipv6privacy_log.json", + "supported_files": ["IPV6PrivacyLog"] + }, + { + "name": "NOTIFYlog", + "supported_config": "notify_log.json", + "supported_files": ["NOTIFYLog"] + }, + { + "name": "PPPManagerlog", + "supported_config": "pppmanager_log.json", + "supported_files": ["PPPManagerLog"] + }, + { + "name": "RTMessagelog", + "supported_config": "rtmessage_log.json", + "supported_files": ["RTMessagelog"] + }, + { + "name": "SmartHomeAgentLog", + "supported_config": "smarthomeagent_log.json", + "supported_files": ["SmartHomeAgentLog"] + }, + { + "name": "TDMlog", + "supported_config": "tdm_log.json", + "supported_files": ["TDMlog"] + }, + { + "name": "TELCOVOICEMANAGERLog", + "supported_config": "telecovoice_log.json", + "supported_files": ["TELCOVOICEMANAGERLog"] + }, + { + "name": "VLANManagerLog", + "supported_config": "vlanmanager_log.json", + "supported_files": ["VLANManagerLog"] + }, + { + "name": "VPNManagerLog", + "supported_config": "vpnmanager_log.json", + "supported_files": ["VPNManagerLog"] + }, + { + "name": "WANMANAGERLog", + "supported_config": "wanmanager_log.json", + "supported_files": ["WANMANAGERLog"] + }, + { + "name": "WEBPAlog", + "supported_config": "webpa_log.json", + "supported_files": ["WEBPAlog"] + }, + { + "name": "xconf", + "supported_config": "xconf_log.json", + "supported_files": ["xconf"] + }, + { + "name": "XDSLMANAGERLog", + "supported_config": "xdslmanager_log.json", + "supported_files": ["XDSLMANAGERLog"] + }, + { + "name": "VoIPlog", + "supported_config": "voip_log.json", + "supported_files": ["VoIPlog"] + } + ] +} diff --git a/configs/console_log.json b/configs/console_log.json new file mode 100644 index 0000000..e69de29 diff --git a/configs/cujo_agent.json b/configs/cujo_agent.json new file mode 100644 index 0000000..a2d7ac1 --- /dev/null +++ b/configs/cujo_agent.json @@ -0,0 +1,40 @@ + +{ + "_commnet": "2025-06-03T15:21:41 telekom: CUJOAGENT [tracer] INFO trace [timer=tracer cpu%=2.6444200866088559 vsize_kb=146988 rss_kb=22128 lua_mem_pre_gc_kb=1095.74609375 lua_mem_kb=1077.017578125 lua_refs=53 uptime_s=74700]", + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Module", "Level1", "Level2"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":["="], + "custom_replace_list": [] + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} \ No newline at end of file diff --git a/configs/enrollmentagent_log.json b/configs/enrollmentagent_log.json new file mode 100644 index 0000000..260f443 --- /dev/null +++ b/configs/enrollmentagent_log.json @@ -0,0 +1,43 @@ +{ + + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "ENROLMENTAGENT\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} diff --git a/configs/ethagent_log.json b/configs/ethagent_log.json new file mode 100644 index 0000000..830a50d --- /dev/null +++ b/configs/ethagent_log.json @@ -0,0 +1,43 @@ +{ + + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "ETHAGENT\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} diff --git a/configs/gponmanager_log.json b/configs/gponmanager_log.json new file mode 100644 index 0000000..4783041 --- /dev/null +++ b/configs/gponmanager_log.json @@ -0,0 +1,43 @@ +{ + + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "GPONMANAGER\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} diff --git a/configs/ipv6privacy_log.json b/configs/ipv6privacy_log.json new file mode 100644 index 0000000..0203908 --- /dev/null +++ b/configs/ipv6privacy_log.json @@ -0,0 +1,43 @@ +{ + + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "IPV6PRIVACY\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} diff --git a/configs/lmlite.json b/configs/lmlite.json new file mode 100644 index 0000000..823600c --- /dev/null +++ b/configs/lmlite.json @@ -0,0 +1,42 @@ +{ + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "LM\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} diff --git a/configs/notify_log.json b/configs/notify_log.json new file mode 100644 index 0000000..0c5a1d7 --- /dev/null +++ b/configs/notify_log.json @@ -0,0 +1,34 @@ +{ + + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "NOTFIY\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + } +} diff --git a/configs/pandm_log.json b/configs/pandm_log.json new file mode 100644 index 0000000..8b0acf7 --- /dev/null +++ b/configs/pandm_log.json @@ -0,0 +1,42 @@ +{ + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "PAM\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} diff --git a/configs/parodus.json b/configs/parodus.json new file mode 100644 index 0000000..18cc82f --- /dev/null +++ b/configs/parodus.json @@ -0,0 +1,43 @@ +{ + "_comment":"2025-06-03T15:21:49 speedport: PARODUS.INFO [tid=12672] PARODUS: Ping received with payload mac:34194dc2f81f, opcode 9", + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "PARODUS\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} \ No newline at end of file diff --git a/configs/pppmanager_log.json b/configs/pppmanager_log.json new file mode 100644 index 0000000..a485039 --- /dev/null +++ b/configs/pppmanager_log.json @@ -0,0 +1,43 @@ +{ + + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "PPPMANAGER\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} diff --git a/configs/psm_log.json b/configs/psm_log.json new file mode 100644 index 0000000..cfe8de1 --- /dev/null +++ b/configs/psm_log.json @@ -0,0 +1,43 @@ +{ + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "PSM\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + , + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} diff --git a/configs/rtmessage_log.json b/configs/rtmessage_log.json new file mode 100644 index 0000000..8f5c0ed --- /dev/null +++ b/configs/rtmessage_log.json @@ -0,0 +1,43 @@ +{ + + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "RTMESSAGE\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} diff --git a/configs/smarthomeagent_log.json b/configs/smarthomeagent_log.json new file mode 100644 index 0000000..353e151 --- /dev/null +++ b/configs/smarthomeagent_log.json @@ -0,0 +1,43 @@ +{ + + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "CCSPSMARTHOMEAGENT\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} diff --git a/configs/state_info_manager.json b/configs/state_info_manager.json new file mode 100644 index 0000000..9e3e402 --- /dev/null +++ b/configs/state_info_manager.json @@ -0,0 +1,42 @@ +{ + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "STATEINFOMANAGER\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} diff --git a/configs/syslog_ng.json b/configs/syslog_ng.json new file mode 100644 index 0000000..7ee32be --- /dev/null +++ b/configs/syslog_ng.json @@ -0,0 +1,42 @@ +{ + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} + diff --git a/configs/tdm_log.json b/configs/tdm_log.json new file mode 100644 index 0000000..cc2200d --- /dev/null +++ b/configs/tdm_log.json @@ -0,0 +1,42 @@ +{ + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "TDM\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} diff --git a/configs/telco_voice_mgr.json b/configs/telco_voice_mgr.json new file mode 100644 index 0000000..413aa87 --- /dev/null +++ b/configs/telco_voice_mgr.json @@ -0,0 +1,43 @@ +{ + "_comment":"2025-06-03T15:30:43 speedport: TELCOVOICEMANAGER.INFO [tid=2672] get_event_param:1144 Event name = voiceServiceUpdateObject", + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "TELCOVOICEMANAGER\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} \ No newline at end of file diff --git a/configs/telecovoice_log.json b/configs/telecovoice_log.json new file mode 100644 index 0000000..a0b35ef --- /dev/null +++ b/configs/telecovoice_log.json @@ -0,0 +1,42 @@ +{ + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "TELECOVOICEMANAGER\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} diff --git a/configs/telemetry2.json b/configs/telemetry2.json new file mode 100644 index 0000000..89a3692 --- /dev/null +++ b/configs/telemetry2.json @@ -0,0 +1,42 @@ +{ + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "T2\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} \ No newline at end of file diff --git a/configs/test.json b/configs/test.json new file mode 100644 index 0000000..525d2a9 --- /dev/null +++ b/configs/test.json @@ -0,0 +1,42 @@ +{ + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "XDSLMANAGER\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} \ No newline at end of file diff --git a/configs/version.json b/configs/version.json new file mode 100644 index 0000000..81f1d80 --- /dev/null +++ b/configs/version.json @@ -0,0 +1,50 @@ +""" +MACHINE_NAME=HGW01A-ARC +imagename:HGW01A-ARC_v004.011.070#2025:05:16 +BRANCH=rdkb-2022q3-dunfell-dt +VERSION=004.011.070 +SPIN=070 +BUILD-TIME="2025-05-16 14:34:44" +WORKFLOW-TYPE=REL-RDK +BUILD-ID=43962133 +Generated on Fri May 16 14:34:44 UTC 2025 +SDK_VERSION=ecnt--sdk-7.3.283.3100_v187 +""" +{ + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%d %H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Module", "Submodule"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":["-"], + "custom_replace_list": {} + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} \ No newline at end of file diff --git a/configs/vlanmanager_log.json b/configs/vlanmanager_log.json new file mode 100644 index 0000000..84fc93d --- /dev/null +++ b/configs/vlanmanager_log.json @@ -0,0 +1,42 @@ +{ + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "VLANMANAGER\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} diff --git a/configs/voip_log.json b/configs/voip_log.json new file mode 100644 index 0000000..d3fc7c7 --- /dev/null +++ b/configs/voip_log.json @@ -0,0 +1,41 @@ +{ + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "VOIP\\.([\\w]+)": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} diff --git a/configs/vpnmanager_log.json b/configs/vpnmanager_log.json new file mode 100644 index 0000000..963ec1b --- /dev/null +++ b/configs/vpnmanager_log.json @@ -0,0 +1,42 @@ +{ + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "VPNMANAGER\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} diff --git a/configs/wanmanager_log.json b/configs/wanmanager_log.json new file mode 100644 index 0000000..7511896 --- /dev/null +++ b/configs/wanmanager_log.json @@ -0,0 +1,42 @@ +{ + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "WANMANAGER\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} diff --git a/configs/webpa.json b/configs/webpa.json new file mode 100644 index 0000000..ea40e67 --- /dev/null +++ b/configs/webpa.json @@ -0,0 +1,43 @@ +{ + "_comment":"2025-06-03T18:03:12 speedport: WEBPA.INFO [tid=12808] WEBPA: Received data ParamName X_RDKCENTRAL-COM_Connected-Client,data : Connected-Client,Other,40:61:86:e9:ce:51,Connected,PCBuero", + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "WEBPA\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} \ No newline at end of file diff --git a/configs/webpa_log.json b/configs/webpa_log.json new file mode 100644 index 0000000..5f1090f --- /dev/null +++ b/configs/webpa_log.json @@ -0,0 +1,42 @@ +{ + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "WEBPA\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} diff --git a/configs/wifi_log.json b/configs/wifi_log.json new file mode 100644 index 0000000..14c495f --- /dev/null +++ b/configs/wifi_log.json @@ -0,0 +1,42 @@ +{ + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "WIFI\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} \ No newline at end of file diff --git a/configs/xDSL_mgr.json b/configs/xDSL_mgr.json new file mode 100644 index 0000000..525d2a9 --- /dev/null +++ b/configs/xDSL_mgr.json @@ -0,0 +1,42 @@ +{ + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "XDSLMANAGER\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} \ No newline at end of file diff --git a/configs/xconf_log.json b/configs/xconf_log.json new file mode 100644 index 0000000..559e061 --- /dev/null +++ b/configs/xconf_log.json @@ -0,0 +1,42 @@ +{ + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "XCONF\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} diff --git a/configs/xdslmanager_log.json b/configs/xdslmanager_log.json new file mode 100644 index 0000000..9eda4e0 --- /dev/null +++ b/configs/xdslmanager_log.json @@ -0,0 +1,42 @@ +{ + "data_loader_config": { + "infer_datetime": true, + "datetime_format": "%Y-%m-%dT%H:%M:%S", + "reader_args": { + "log_format": " " + }, + "dimensions": { + "timestamp": ["DateTime"], + "body": ["Content"], + "attributes": ["Level"] + } + }, + "preprocessor_config": { + "custom_delimiters_regex":[], + "custom_replace_list": { + "^([\\d\\-\\d\\-\\w:\\d:\\d]+)": "", + "XDSLMANAGER\\.([\\w]+)": "", + "tid=\\d+\\]\\s*(.*)$": "" + } + }, + "log_parser_config": { + "parsing_algorithm": "drain", + "parsing_algo_params": { + "sim_th": 0.5, + "depth": 5 + } + }, + "feature_extractor_config": { + "group_by_category": ["Level"], + "group_by_time": "1min" + }, + "log_vectorizer_config": { + "algo_name": "word2vec" + }, + "categorical_encoder_config": { + "name": "label_encoder" + }, + "anomaly_detection_config": { + "algo_name": "one_class_svm" + } +} diff --git a/gui/app_instance.py b/gui/app_instance.py new file mode 100644 index 0000000..9669a00 --- /dev/null +++ b/gui/app_instance.py @@ -0,0 +1,16 @@ +# gui/app_instance.py + +import dash +import dash_bootstrap_components as dbc +from flask import Flask + +flask_server = Flask(__name__) + +app = dash.Dash( + __name__, + suppress_callback_exceptions=True, + external_stylesheets=[dbc.themes.BOOTSTRAP], + meta_tags=[{"name": "viewport", "content": "width=device-width, initial-scale=1"}], + title="LogAI", + server=flask_server +) diff --git a/gui/application.py b/gui/application.py index 1ec433f..106d05f 100755 --- a/gui/application.py +++ b/gui/application.py @@ -7,27 +7,38 @@ # import dash import dash_bootstrap_components as dbc -from dash import dcc, html, Input, Output, callback +from dash import dcc, html, Input, Output, State, callback +from gui.app_instance import app, flask_server from gui.pages.utils import create_banner from gui.pages import pattern as pattern_page +from gui.pages import telemetry as telemetry_page from gui.pages import anomaly_detection as anomaly_page from gui.pages import clustering as clustering_page -from gui.callbacks import pattern, anomaly_detection, clustering, utils - +from gui.pages import ai_analysis as ai_analysis_page +from gui.callbacks import pattern, telemetry, anomaly_detection, clustering, utils, ai_analysis +from gui.file_manager import FileManager +from flask import Flask +flask_server = Flask(__name__) app = dash.Dash( __name__, + suppress_callback_exceptions=True, external_stylesheets=[dbc.themes.BOOTSTRAP], meta_tags=[{"name": "viewport", "content": "width=device-width, initial-scale=1"}], title="LogAI", + server=flask_server ) -server = app.server -app.config["suppress_callback_exceptions"] = True +#server = app.server +#app.config["suppress_callback_exceptions"] = True + +file_manager = FileManager() +file_manager.clean_temp_files() app.layout = dbc.Container( [ dcc.Location(id="url", refresh=False), + html.Div(id='restore-dropdown-value', style={'display':'none'}), dbc.Container(id="page-content", fluid=True), ], fluid=True, @@ -36,7 +47,11 @@ @callback(Output("page-content", "children"), [Input("url", "pathname")]) def display_page(pathname): - if pathname == "/logai/pattern": + if pathname == "/logai/telemetry": + return dbc.Container( + [dbc.Row(create_banner(app)), telemetry_page.layout], fluid=True + ) + elif pathname == "/logai/pattern": return dbc.Container( [dbc.Row(create_banner(app)), pattern_page.layout], fluid=True ) @@ -48,6 +63,10 @@ def display_page(pathname): return dbc.Container( [dbc.Row(dbc.Col(create_banner(app))), clustering_page.layout], fluid=True ) + elif pathname == "/logai/ai_analysis": + return dbc.Container( + [dbc.Row(dbc.Col(create_banner(app))), ai_analysis_page.layout], fluid=True + ) else: return dbc.Container( [dbc.Row(dbc.Col(create_banner(app))), pattern_page.layout], fluid=True @@ -55,4 +74,6 @@ def display_page(pathname): if __name__ == "__main__": - app.run_server(debug=False) + #import nltk + #nltk.download('punkt_tab') + app.run(debug=True) diff --git a/gui/assets/drain3.ini b/gui/assets/drain3.ini new file mode 100644 index 0000000..7020485 --- /dev/null +++ b/gui/assets/drain3.ini @@ -0,0 +1,30 @@ +[SNAPSHOT] +snapshot_interval_minutes = 10 +compress_state = True + +[MASKING] +masking = [ + {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(([0-9a-f]{2,}:){3,}([0-9a-f]{2,}))((?=[^A-Za-z0-9])|$)", "mask_with": "ID"}, + {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})((?=[^A-Za-z0-9])|$)", "mask_with": "IP"}, + {"regex_pattern":"((?<=[^A-Za-z0-9])|^)([0-9a-f]{6,} ?){3,}((?=[^A-Za-z0-9])|$)", "mask_with": "SEQ"}, + {"regex_pattern":"((?<=[^A-Za-z0-9])|^)([0-9A-F]{4} ?){4,}((?=[^A-Za-z0-9])|$)", "mask_with": "SEQ"}, + {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(0x[a-f0-9A-F]+)((?=[^A-Za-z0-9])|$)", "mask_with": "HEX"}, + {"regex_pattern":"((?<=[^A-Za-z0-9])|^)([\\-\\+]?\\d+)((?=[^A-Za-z0-9])|$)", "mask_with": "NUM"}, + {"regex_pattern":"(?<=executed cmd )(\".+?\")", "mask_with": "CMD"} + ] +mask_prefix = <: +mask_suffix = :> + +[DRAIN] +# engine is Optional parameter. Engine will be "Drain" if the engine argument is not specified. +# engine has two options: 'Drain' and 'JaccardDrain'. +# engine = Drain +sim_th = 0.4 +depth = 4 +max_children = 100 +max_clusters = 1024 +extra_delimiters = ["_"] + +[PROFILING] +enabled = True +report_sec = 30 diff --git a/gui/assets/favicon.ico b/gui/assets/favicon.ico deleted file mode 100644 index e1871ab..0000000 Binary files a/gui/assets/favicon.ico and /dev/null differ diff --git a/gui/assets/logai.css b/gui/assets/logai.css index 0c7b5c8..901e575 100644 --- a/gui/assets/logai.css +++ b/gui/assets/logai.css @@ -36,9 +36,10 @@ hr { } .banner { - height: 5.5rem; + height: 10rem; padding: 0rem 2rem 0rem; - background-color: white; + color: white; + background-color: black; display: flex; flex-direction: row; align-items: center; @@ -46,7 +47,7 @@ hr { .banner Img { position: relative; - height: 4.5rem; + height: 10rem; width: auto; margin-left: 2%; } diff --git a/gui/assets/logai.png b/gui/assets/logai.png deleted file mode 100644 index 2760b1a..0000000 Binary files a/gui/assets/logai.png and /dev/null differ diff --git a/gui/assets/logai_logo.jpg b/gui/assets/logai_logo.jpg deleted file mode 100644 index bbaaac4..0000000 Binary files a/gui/assets/logai_logo.jpg and /dev/null differ diff --git a/gui/assets/logai_logo.png b/gui/assets/logai_logo.png new file mode 100644 index 0000000..7326b12 Binary files /dev/null and b/gui/assets/logai_logo.png differ diff --git a/gui/callbacks/ai_analysis.py b/gui/callbacks/ai_analysis.py new file mode 100644 index 0000000..6110b99 --- /dev/null +++ b/gui/callbacks/ai_analysis.py @@ -0,0 +1,46 @@ +from dash import dash_table +from dash import Input, Output, callback +from gui.app_instance import app +from gui.diagnose_broadband import run_diagnosis + +@callback( + Output("ai-script-output", "children"), + Input("run-ai-script-btn", "n_clicks"), + prevent_initial_call=True, +) +def run_ai_script(n_clicks): + result = run_diagnosis( + model_dir="/Users/sivasubramanianm/Downloads/HackFest-06/Training/models/TinyLlama/TinyLlama-1.1B-Chat-v1.0", + test_file="app_uploaded_files/telemetry/Telemetry2_report.xlsx", + column_file="column_list.txt" + ) + # Render as Dash DataTable + return dash_table.DataTable( + data=result, + columns=[ + {"name": "Time", "id": "Time"}, + {"name": "MAC", "id": "MAC"}, + {"name": "Diagnosis", "id": "Diagnosis"} + ], + style_table={ + "overflowX": "auto", + "border": "1px solid #ccc", + }, + style_cell={ + "textAlign": "left", + "whiteSpace": "normal", + "padding": "8px", + "fontFamily": "Arial", + "fontSize": "14px", + }, + style_header={ + 'backgroundColor': 'rgb(50, 50, 50)', + 'color': 'black' + }, + style_data={ + 'backgroundColor': 'white', + 'color': 'black' + }, + page_size=10, + ) + diff --git a/gui/callbacks/anomaly_detection.py b/gui/callbacks/anomaly_detection.py index c562eb5..65e015c 100755 --- a/gui/callbacks/anomaly_detection.py +++ b/gui/callbacks/anomaly_detection.py @@ -29,8 +29,6 @@ from ..pages.utils import create_param_table log_anomaly_demo = LogAnomaly() -file_manager = FileManager() - def _ad_config_sample(): config = WorkFlowConfig( @@ -59,6 +57,14 @@ def create_attribute_component(attributes): }, style_header_conditional=[{"textAlign": "left"}], style_cell_conditional=[{"textAlign": "left"}], + style_header={ + 'backgroundColor': 'rgb(50, 50, 50)', + 'color': 'white' + }, + style_data={ + 'backgroundColor': 'white', + 'color': 'black' + }, ) # print(table) return html.Div(children=[table, html.Div(id="table-dropdown-container")]) @@ -73,31 +79,31 @@ def create_attribute_component(attributes): Input("anomaly_exception_modal_close", "n_clicks"), ], [ - State("log-type-select", "value"), - State("attribute-name-options", "value"), + #State("log-type-select", "value"), + #State("attribute-name-options", "value"), State("file-select", "value"), - State("parsing-algo-select", "value"), - State("vectorization-algo-select", "value"), - State("categorical-encoder-select", "value"), + #State("parsing-algo-select", "value"), + #State("vectorization-algo-select", "value"), + #State("categorical-encoder-select", "value"), State("ad-algo-select", "value"), State("time-interval", "value"), State("ad-param-table", "children"), - State("ad-parsing-param-table", "children"), + #State("ad-parsing-param-table", "children"), ], ) def click_run( btn_click, modal_close, - log_type, - attributes, + #log_type, + #attributes, filename, - parsing_algo, - vectorization_algo, - categorical_encoder, + #parsing_algo, + #vectorization_algo, + #categorical_encoder, ad_algo, time_interval, ad_param_table, - parsing_param_table, + #parsing_param_table, ): ctx = dash.callback_context if ctx.triggered: @@ -107,7 +113,6 @@ def click_run( interval_map = {0: "1s", 1: "1min", 2: "1h", 3: "1d"} freq = interval_map[time_interval] - file_path = os.path.join(file_manager.base_directory, filename) ad_params = log_anomaly_demo.parse_parameters( param_info=log_anomaly_demo.get_parameter_info(ad_algo), params={ @@ -116,31 +121,28 @@ def click_run( if p["Parameter"] }, ) - parsing_params = LogPattern().parse_parameters( - param_info=LogPattern().get_parameter_info(parsing_algo), - params={ - p["Parameter"]: p["Value"] - for p in parsing_param_table["props"]["data"] - if p["Parameter"] - }, - ) - config = _ad_config_sample() - config.open_set_data_loader_config.filepath = ( - file_path # overwrite the file path. - ) - config.open_set_data_loader_config.dataset_name = log_type - config.feature_extractor_config.group_by_category = attributes - config.feature_extractor_config.group_by_time = freq - config.log_parser_config.parsing_algorithm = parsing_algo - - config_class = LogPattern().get_config_class(parsing_algo) - config.log_parser_config.parsing_algo_params = config_class.from_dict( - parsing_params - ) + file_manager = FileManager() + config_json = file_manager.load_config(filename) + #print(config_json, flush=True) + if config_json is not None: + config = WorkFlowConfig.from_dict(config_json) + #print(config, flush=True) + + file_path = os.path.join(file_manager.merged_logs_path, filename) + + config.data_loader_config.filepath = file_path + + #config.log_vectorizer_config = VectorizerConfig() + #config.log_vectorizer_config.algo_name = "tfidf" + + #config.categorical_encoder_config = CategoricalEncoderConfig() + #config.categorical_encoder_config.algo_name = "one_hot_encoder" - config.log_vectorizer_config.algo_name = vectorization_algo - config.categorical_encoder_config.algo_name = categorical_encoder + #config.clustering_config = ClusteringConfig() + #config.clustering_config.algo_name = "DBSCAN" + + #config.feature_extractor_config.group_by_time = freq config.anomaly_detection_config.algo_name = ad_algo config_class = log_anomaly_demo.get_config_class(ad_algo) @@ -149,7 +151,7 @@ def click_run( ) log_anomaly_demo.execute_anomaly_detection(config) - + #print("anamoloy detec: ", log_anomaly_demo.get_attributes()) return ( create_attribute_component(log_anomaly_demo.get_attributes()), False, @@ -249,6 +251,14 @@ def update_counter_table(data): page_action="native", page_size=20, page_current=0, + style_header={ + 'backgroundColor': 'rgb(50, 50, 50)', + 'color': 'white' + }, + style_data={ + 'backgroundColor': 'white', + 'color': 'black' + }, ) else: return dash_table.DataTable() diff --git a/gui/callbacks/clustering.py b/gui/callbacks/clustering.py index 7b6ac29..db22b0c 100755 --- a/gui/callbacks/clustering.py +++ b/gui/callbacks/clustering.py @@ -5,8 +5,13 @@ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause # # +import csv +import logging import os - +import re +import sys +from drain3 import TemplateMiner +from drain3.template_miner_config import TemplateMinerConfig import dash import pandas as pd from dash import html, Input, Output, State, callback, dash_table @@ -27,7 +32,6 @@ from ..pages.utils import create_param_table log_clustering = Clustering() -file_manager = FileManager() def _clustering_config(): @@ -44,7 +48,7 @@ def _clustering_config(): def create_attribute_component(attributes): - print(attributes) + #print(attributes) table = dash_table.DataTable( id="cluster-attribute-table", data=attributes.iloc[:1].to_dict("records"), @@ -58,6 +62,14 @@ def create_attribute_component(attributes): }, style_header_conditional=[{"textAlign": "left"}], style_cell_conditional=[{"textAlign": "left"}], + style_header={ + 'backgroundColor': 'rgb(50, 50, 50)', + 'color': 'white' + }, + style_data={ + 'backgroundColor': 'white', + 'color': 'black' + }, ) return html.Div(children=[table, html.Div(id="table-dropdown-container")]) @@ -72,74 +84,56 @@ def create_attribute_component(attributes): Input("clustering_exception_modal_close", "n_clicks"), ], [ - State("log-type-select", "value"), - State("attribute-name-options", "value"), + #State("log-type-select", "value"), + #State("attribute-name-options", "value"), State("file-select", "value"), - State("parsing-algo-select", "value"), - State("vectorization-algo-select", "value"), - State("categorical-encoder-select", "value"), - State("clustering-algo-select", "value"), - State("clustering-param-table", "children"), - State("clustering-parsing-param-table", "children"), + #State("parsing-algo-select", "value"), + #State("vectorization-algo-select", "value"), + #State("categorical-encoder-select", "value"), + #State("clustering-algo-select", "value"), + #State("clustering-param-table", "children"), + #State("clustering-parsing-param-table", "children"), ], ) def click_run( btn_click, modal_close, - log_type, - attributes, + #log_type, + #attributes, filename, - parsing_algo, - vectorization_algo, - categorical_encoder, - clustering_algo, - clustering_param_table, - parsing_param_table, + #parsing_algo, + #vectorization_algo, + #categorical_encoder, + #clustering_algo, + #clustering_param_table, + #parsing_param_table, ): ctx = dash.callback_context if ctx.triggered: prop_id = ctx.triggered[0]["prop_id"].split(".")[0] if prop_id == "clustering-btn": try: - file_path = os.path.join(file_manager.base_directory, filename) - clustering_params = log_clustering.parse_parameters( - param_info=log_clustering.get_parameter_info(clustering_algo), - params={ - p["Parameter"]: p["Value"] - for p in clustering_param_table["props"]["data"] - if p["Parameter"] - }, - ) - parsing_params = LogPattern().parse_parameters( - param_info=LogPattern().get_parameter_info(parsing_algo), - params={ - p["Parameter"]: p["Value"] - for p in parsing_param_table["props"]["data"] - if p["Parameter"] - }, - ) + file_manager = FileManager() + config_json = file_manager.load_config(filename) + #print(config_json, flush=True) + if config_json is not None: + # in log clustering disable anomaly detection + config_json['anomaly_detection_config'] = None + config = WorkFlowConfig.from_dict(config_json) + #print(config, flush=True) - config = _clustering_config() - config.open_set_data_loader_config.filepath = ( - file_path # overwrite the file path. - ) - config.open_set_data_loader_config.dataset_name = log_type - config.feature_extractor_config.group_by_category = attributes - config.log_parser_config.parsing_algorithm = parsing_algo + file_path = os.path.join(file_manager.merged_logs_path, filename) - config_class = LogPattern().get_config_class(parsing_algo) - config.log_parser_config.parsing_algo_params = config_class.from_dict( - parsing_params - ) + config.data_loader_config.filepath = file_path - config.log_vectorizer_config.algo_name = vectorization_algo - config.categorical_encoder_config.algo_name = categorical_encoder - config.clustering_config.algo_name = clustering_algo + config.log_vectorizer_config = VectorizerConfig() + config.log_vectorizer_config.algo_name = "tfidf" - config_class = log_clustering.get_config_class(clustering_algo) - config.clustering_config.algo_params = config_class.from_dict( - clustering_params - ) + config.categorical_encoder_config = CategoricalEncoderConfig() + config.categorical_encoder_config.algo_name = "one_hot_encoder" + + config.clustering_config = ClusteringConfig() + config.clustering_config.algo_name = "DBSCAN" log_clustering.execute_clustering(config) @@ -177,7 +171,7 @@ def generate_pie_chart(df): Output("clustering-loglines", "children"), [Input("cluster-hist", "clickData")] ) def update_logline_list(data): - if len(data) > 0: + if data and len(data) > 0: cluster_label = data["points"][0]["label"] # return html.Div(str(data['points'][0])) # for debug df = log_clustering.get_loglines(cluster_label) @@ -199,6 +193,14 @@ def update_logline_list(data): page_action="native", page_size=20, page_current=0, + style_header={ + 'backgroundColor': 'rgb(50, 50, 50)', + 'color': 'white' + }, + style_data={ + 'backgroundColor': 'white', + 'color': 'black' + }, ) else: return dash_table.DataTable() @@ -245,3 +247,154 @@ def select_parsing_algorithm(algorithm): param_info = LogPattern().get_parameter_info(algorithm) param_table = create_param_table(param_info) return param_table + + +def combine_logs_by_timestamp(input_folder, output_log_path): + """ + Combines log lines from all files in input_folder. + Lines with valid timestamps are sorted by timestamp. + Lines without timestamps are appended at the end. + """ + timestamp_pattern = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}") + lines_with_ts = [] + lines_without_ts = [] + + for root, _, files in os.walk(input_folder): + for file in files: + full_path = os.path.join(root, file) + try: + with open(full_path, 'r', encoding='utf-8', errors='ignore') as f: + for line in f: + line = line.strip() + if not line: + continue + match = timestamp_pattern.match(line) + if match: + lines_with_ts.append((match.group(), line)) + else: + lines_without_ts.append(line) + except Exception: + continue # Skip unreadable files + + # Sort by timestamp + lines_with_ts.sort(key=lambda x: x[0]) + + # Write sorted + unsorted lines to output + with open(output_log_path, 'w', encoding='utf-8') as out_file: + for _, line in lines_with_ts: + out_file.write(line + "\n") + for line in lines_without_ts: + out_file.write(line + "\n") + + return output_log_path + + +def process_with_drain3(input_log_file, output_csv_path): + + logging.basicConfig(stream=sys.stdout, level=logging.WARNING) + + config = TemplateMinerConfig() + # Optional: comment out or fix config.load if it causes issues + print("before config load") + config.load("./gui/assets/drain3.ini") + print("after config load") + config.profiling_enabled = False + config.snapshot_interval_minutes = 0 # Disable snapshotting + config.save_snapshot = False + template_miner = TemplateMiner(config=config) + + + timestamp_pattern = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}") + processed = [] + + with open(input_log_file, 'r', encoding='utf-8', errors='ignore') as f: + for idx, line in enumerate(f, 1): + line = line.strip() + if not line: continue + match = timestamp_pattern.match(line) + timestamp = match.group() if match else "" + result = template_miner.add_log_message(line) + if result: + processed.append({ + "timestamp": timestamp, + "cluster_id": result["cluster_id"], + "template": result["template_mined"], + "log": line + }) + if idx % 1000 == 0: + print(f"Processed {idx} lines...") + print(f"Done: processed {len(processed)} lines. Now writing CSV.") + + with open(output_csv_path, 'w', newline='', encoding='utf-8') as cf: + writer = csv.DictWriter(cf, fieldnames=["timestamp", "cluster_id", "template", "log"]) + writer.writeheader() + for row in processed: + writer.writerow(row) + + print("CSV written:", output_csv_path) + + + + +@callback( +Output("pattern-graph_total", "figure"), +#Output("status-msg", "children"), +[ +Input("clustering-btn-all", "n_clicks"), +Input("clustering_exception_modal_close", "n_clicks"), +], +prevent_initial_call=True +) +def on_run_click(n_clicks, modal_close): + try: + file_manager = FileManager() + # The folder with uploaded logs + output_log_path = os.path.join(file_manager.merged_logs_path, 'combined.log') # Combined output log + output_csv_path = os.path.join(file_manager.merged_logs_path, 'processed.csv') # Drain3 output CSV + # Step 1: Combine logs or do pre-processing + print("Combining logs from:", file_manager.merged_logs_path) + combine_logs_by_timestamp(file_manager.merged_logs_path,output_log_path)# e.g., combine logs + + # Step 2: Run Drain3 or pattern mining + print("Processing logs with Drain3...") + process_with_drain3(output_log_path,output_csv_path) # generates "processed.csv" + + # Step 3: Load resulting CSV + df = pd.read_csv(output_csv_path) + print("read csv.") + # Example: Plot count of logs per template + fig = px.scatter(df, x="cluster_id",title="Log Template Frequency",custom_data=["cluster_id", "template", "log"]) + print("Plotting done.") + return fig + + + except Exception as e: + return dash.no_update, f"❌ Error: {str(e)}" + +@callback( + Output("cluster-tmp", "children"), + Input("pattern-graph_total", "clickData"), + prevent_initial_call=True + ) + +def show_cluster_details(clickData): + if not clickData or "points" not in clickData: + return "No data selected." + point = clickData["points"][0] + # customdata = [cluster_id, template, log] + if "customdata" in point and point["customdata"]: + cluster_id, template, log = point["customdata"] + return html.Div([ + html.H4(f"Cluster ID: {cluster_id}"), + html.B("Template: "), html.Pre(template, style={"whiteSpace": "pre-wrap"}), html.Br(), + html.B("Log: "), html.Pre(log, style={"whiteSpace": "pre-wrap"}), + ], style={'backgroundColor': '#FFFFFF', 'padding': '1em', 'borderRadius': '10px'}) + else: + return html.Div([ + "No cluster data found in selection.", + html.Pre(str(point), style={"color": "crimson", "fontSize": "smaller"}) + ]) + + + + diff --git a/gui/callbacks/pattern.py b/gui/callbacks/pattern.py index 067380c..702e7e6 100755 --- a/gui/callbacks/pattern.py +++ b/gui/callbacks/pattern.py @@ -5,11 +5,17 @@ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause # # +import csv +import logging import os +import re +import sys import dash +from flask import app import pandas as pd import plotly.express as px - +from drain3 import TemplateMiner +from drain3.template_miner_config import TemplateMinerConfig from dash import html, Input, Output, State, callback, dash_table from logai.applications.application_interfaces import ( @@ -22,18 +28,14 @@ from gui.file_manager import FileManager from gui.demo.log_pattern import LogPattern from logai.dataloader.openset_data_loader import ( - OpenSetDataLoaderConfig, - OpenSetDataLoader, + FileDataLoader, ) from ..pages.utils import create_param_table log_pattern_demo = LogPattern() -file_manager = FileManager() - - def _config_sample(): config = WorkFlowConfig( - open_set_data_loader_config=OpenSetDataLoaderConfig(), + data_loader_config=FileDataLoader(), feature_extractor_config=FeatureExtractorConfig(), preprocessor_config=PreprocessorConfig( custom_delimiters_regex=[":", ",", "=", "\t"] @@ -63,7 +65,7 @@ def create_attribute_component(attributes): ) return html.Div(children=[table, html.Div(id="table-dropdown-container")]) - +""" @callback( Output("attribute-name-options", "options"), Output("attribute-name-options", "value"), @@ -89,7 +91,7 @@ def get_attributes(log_type): options = [{"label": str(c), "value": str(c)} for c in attributes] values = [str(c) for c in attributes] return options, values - +""" @callback( Output("attribute-options", "children"), @@ -100,48 +102,37 @@ def get_attributes(log_type): Input("pattern_exception_modal_close", "n_clicks"), ], [ - State("log-type-select", "value"), - State("attribute-name-options", "value"), State("file-select", "value"), - State("parsing-algo-select", "value"), - State("parsing-param-table", "children"), ], ) def click_run( - btn_click, modal_close, log_type, attributes, filename, parsing_algo, param_table + btn_click, modal_close, filename ): ctx = dash.callback_context try: if ctx.triggered: prop_id = ctx.triggered[0]["prop_id"].split(".")[0] if prop_id == "pattern-btn": - # TODO: Build WorkFlowConfig - file_path = os.path.join(file_manager.base_directory, filename) - params = log_pattern_demo.parse_parameters( - param_info=log_pattern_demo.get_parameter_info(parsing_algo), - params={ - p["Parameter"]: p["Value"] - for p in param_table["props"]["data"] - if p["Parameter"] - }, - ) - - config = _config_sample() - config.open_set_data_loader_config.filepath = ( - file_path # overwrite the file path. - ) - config.open_set_data_loader_config.dataset_name = log_type - config.log_parser_config.parsing_algorithm = parsing_algo - - config_class = log_pattern_demo.get_config_class(parsing_algo) - config.log_parser_config.parsing_algo_params = config_class.from_dict( - params - ) - + file_manager = FileManager() + config_json = file_manager.load_config(filename) + #print(config_json, flush=True) + if config_json is not None: + # in log summarization disable parsing clustering and anomaly detection + config_json['anomaly_detection_config'] = None + config_json['clustering_config'] = None + config = WorkFlowConfig.from_dict(config_json) + #print(config, flush=True) + + file_path = os.path.join(file_manager.merged_logs_path, filename) + if not os.path.getsize(file_path): + raise RuntimeError("File Lenght is Zero!") + + config.data_loader_config.filepath = file_path log_pattern_demo.execute_auto_parsing(config) + return ( create_attribute_component( - log_pattern_demo.get_attributes()[attributes] + log_pattern_demo.get_attributes() ), False, "", @@ -261,8 +252,11 @@ def update_summary_graph(data): [Input("summary-scatter", "clickData"), Input("time-interval", "value")], prevent_initial_call=True, ) + def update_y_timeseries(data, interval): - print(data) + #print(data) + if not data: + return interval_map = {0: "1s", 1: "1min", 2: "1h", 3: "1d"} pattern = data["points"][0]["customdata"] freq = interval_map[interval] @@ -270,7 +264,7 @@ def update_y_timeseries(data, interval): dff = result_df[result_df["parsed_logline"] == pattern][ ["timestamp", "parsed_logline"] ] - + #print(dff) ts_df = ( dff[["timestamp", "parsed_logline"]] .groupby(pd.Grouper(key="timestamp", freq=freq, offset=0, label="right")) @@ -329,6 +323,159 @@ def summary(data): Output("parsing-param-table", "children"), Input("parsing-algo-select", "value") ) def select_parsing_algorithm(algorithm): - param_info = log_pattern_demo.get_parameter_info(algorithm) + param_info = None + if log_pattern_demo is not None: + param_info = log_pattern_demo.get_parameter_info(algorithm) param_table = create_param_table(param_info) return param_table + +''' +def combine_logs_by_timestamp(input_folder, output_log_path): + """ + Combines log lines from all files in input_folder. + Lines with valid timestamps are sorted by timestamp. + Lines without timestamps are appended at the end. + """ + timestamp_pattern = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}") + lines_with_ts = [] + lines_without_ts = [] + + for root, _, files in os.walk(input_folder): + for file in files: + full_path = os.path.join(root, file) + try: + with open(full_path, 'r', encoding='utf-8', errors='ignore') as f: + for line in f: + line = line.strip() + if not line: + continue + match = timestamp_pattern.match(line) + if match: + lines_with_ts.append((match.group(), line)) + else: + lines_without_ts.append(line) + except Exception: + continue # Skip unreadable files + + # Sort by timestamp + lines_with_ts.sort(key=lambda x: x[0]) + + # Write sorted + unsorted lines to output + with open(output_log_path, 'w', encoding='utf-8') as out_file: + for _, line in lines_with_ts: + out_file.write(line + "\n") + for line in lines_without_ts: + out_file.write(line + "\n") + + return output_log_path + + +def process_with_drain3(input_log_file, output_csv_path): + + logging.basicConfig(stream=sys.stdout, level=logging.WARNING) + + config = TemplateMinerConfig() + # Optional: comment out or fix config.load if it causes issues + print("before config load") + config.load("./gui/assets/drain3.ini") + print("after config load") + config.profiling_enabled = False + config.snapshot_interval_minutes = 0 # Disable snapshotting + config.save_snapshot = False + template_miner = TemplateMiner(config=config) + + + timestamp_pattern = re.compile(r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}") + processed = [] + + with open(input_log_file, 'r', encoding='utf-8', errors='ignore') as f: + for idx, line in enumerate(f, 1): + line = line.strip() + if not line: continue + match = timestamp_pattern.match(line) + timestamp = match.group() if match else "" + result = template_miner.add_log_message(line) + if result: + processed.append({ + "timestamp": timestamp, + "cluster_id": result["cluster_id"], + "template": result["template_mined"], + "log": line + }) + if idx % 1000 == 0: + print(f"Processed {idx} lines...") + print(f"Done: processed {len(processed)} lines. Now writing CSV.") + + with open(output_csv_path, 'w', newline='', encoding='utf-8') as cf: + writer = csv.DictWriter(cf, fieldnames=["timestamp", "cluster_id", "template", "log"]) + writer.writeheader() + for row in processed: + writer.writerow(row) + + print("CSV written:", output_csv_path) + + + + +@callback( +Output("pattern-graph_total", "figure"), +#Output("status-msg", "children"), +[ +Input("pattern-btn-all", "n_clicks"), +Input("pattern_exception_modal_close", "n_clicks"), +], +prevent_initial_call=True +) +def on_run_click(n_clicks, modal_close): + try: + file_manager = FileManager() + # The folder with uploaded logs + output_log_path = os.path.join(file_manager.merged_logs_path, 'combined.log') # Combined output log + output_csv_path = os.path.join(file_manager.merged_logs_path, 'processed.csv') # Drain3 output CSV + # Step 1: Combine logs or do pre-processing + print("Combining logs from:", file_manager.merged_logs_path) + combine_logs_by_timestamp(file_manager.merged_logs_path,output_log_path)# e.g., combine logs + + # Step 2: Run Drain3 or pattern mining + print("Processing logs with Drain3...") + process_with_drain3(output_log_path,output_csv_path) # generates "processed.csv" + + # Step 3: Load resulting CSV + df = pd.read_csv(output_csv_path) + print("read csv.") + # Example: Plot count of logs per template + fig = px.scatter(df, x="cluster_id",title="Log Template Frequency",custom_data=["cluster_id", "template", "log"]) + print("Plotting done.") + return fig + + + except Exception as e: + return dash.no_update, f"❌ Error: {str(e)}" + +@callback( + Output("cluster-tmp", "children"), + Input("pattern-graph_total", "clickData"), + prevent_initial_call=True + ) + +def show_cluster_details(clickData): + if not clickData or "points" not in clickData: + return "No data selected." + point = clickData["points"][0] + # customdata = [cluster_id, template, log] + if "customdata" in point and point["customdata"]: + cluster_id, template, log = point["customdata"] + return html.Div([ + html.H4(f"Cluster ID: {cluster_id}"), + html.B("Template: "), html.Pre(template, style={"whiteSpace": "pre-wrap"}), html.Br(), + html.B("Log: "), html.Pre(log, style={"whiteSpace": "pre-wrap"}), + ], style={'backgroundColor': '#FFFFFF', 'padding': '1em', 'borderRadius': '10px'}) + else: + return html.Div([ + "No cluster data found in selection.", + html.Pre(str(point), style={"color": "crimson", "fontSize": "smaller"}) + ]) + + ''' + + diff --git a/gui/callbacks/telemetry.py b/gui/callbacks/telemetry.py new file mode 100755 index 0000000..f62df9a --- /dev/null +++ b/gui/callbacks/telemetry.py @@ -0,0 +1,270 @@ +# +# Copyright (c) 2023 Salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause +# +# +import os +import dash +import pandas as pd +import plotly.express as px +from dash import dcc +from dash import html, Input, Output, State, callback, dash_table +import plotly.graph_objs as go + +from gui.file_manager import FileManager + +from logai.preprocess.telemetry_parser import Telemetry2Parser +from logai.preprocess.telemetry_parser import DML + +telemetry_parser = Telemetry2Parser() +file_manager = FileManager() + +def create_summary_layout(data=pd.DataFrame()): + mac = telemetry_parser.get_telemetry_value(DML.MAC_ADDRESS) + serial = telemetry_parser.get_telemetry_value(DML.SERIAL_NUMBER) + sw_ver = telemetry_parser.get_telemetry_value(DML.SW_VERSION) + hw_ver = telemetry_parser.get_telemetry_value(DML.HW_VERSION) + model = telemetry_parser.get_telemetry_value(DML.MODEL_NAME) + manuf = telemetry_parser.get_telemetry_value(DML.MANUFACTURER, index=1) + + # Summary content + summary_layout = html.Div([ + html.P(f"MAC Address\t: {mac}"), + html.P(f"Serial Number\t: {serial}"), + html.P(f"Software Version\t: {sw_ver}"), + html.P(f"Hardware Version\t: {hw_ver}"), + html.P(f"Model Name\t: {model}"), + html.P(f"Manufacturer\t: {manuf}"), + ]) + return summary_layout + +def create_status_layout(data=pd.DataFrame()): + wan_type = telemetry_parser.get_telemetry_value(DML.WAN_MODE) + radio1_en = telemetry_parser.get_telemetry_value(DML.RADIO1_EN) + radio2_en = telemetry_parser.get_telemetry_value(DML.RADIO2_EN) + ap1_en = telemetry_parser.get_telemetry_value(DML.AP1_EN) + ap2_en = telemetry_parser.get_telemetry_value(DML.AP2_EN) + airties = telemetry_parser.get_telemetry_value(DML.AIETIES_EDGE) + # Summary content + summary_layout = html.Div([ + html.P(f"WAN Mode\t: {wan_type}"), + html.P(f"Airtes Enable Status\t: {airties}"), + html.P(f"Radio 1 Enabled\t: {radio1_en}"), + html.P(f"Radio 2 Enabled\t: {radio2_en}"), + html.P(f"SSID 1 Enabled\t: {ap1_en}"), + html.P(f"SSID 2 Enabled\t: {ap2_en}"), + ]) + return summary_layout + +def parse_size(value): + if value is None: return None + if isinstance(value, (int, float)): return float(value) + value = value.strip() + try: + if value.lower().endswith("kb"): + return float(value[:-2].strip()) + elif value.lower().endswith("m"): + return float(value[:-1].strip()) * 1024 + return float(value) + except: + return None + +def create_mem_graph_layout(data): + # Chart content + time = telemetry_parser.get_timestamp() + + mem_avail = telemetry_parser.get_telemetry_col(DML.MEM_AVAILABLE) + free_mem = telemetry_parser.get_telemetry_col(DML.MEM_FREE) + + if time is None or mem_avail is None or free_mem is None: + return dcc.Graph() + + mem_avail = mem_avail.apply(parse_size) + free_mem = free_mem.apply(parse_size) + + chart = dcc.Graph( + figure={ + "data": [ + go.Scatter(x=time, y=mem_avail, name="Available", mode="lines"), + go.Scatter(x=time, y=free_mem, name="Free", mode="lines"), + ], + "layout": go.Layout( + title="Memory Split", + xaxis_title="Report Time", + yaxis_title="Value", + hovermode="x unified" + ) + } + ) + return chart + +def create_cpu_graph_layout(data): + # Chart content + time = telemetry_parser.get_timestamp() + + cpu_usage = telemetry_parser.get_telemetry_col(DML.CPU_USAGE) + cpu_temp = telemetry_parser.get_telemetry_col(DML.CPU_TEMP) + + if cpu_temp is None or cpu_temp is None: + return dcc.Graph() + + chart = dcc.Graph( + figure={ + "data": [ + go.Scatter(x=time, y=cpu_usage, name="CPU Usage", mode="lines"), + go.Scatter(x=time, y=cpu_temp, name="CPU Temp", mode="lines"), + ], + "layout": go.Layout( + title="CPU usage vs temp", + xaxis_title="Report Time", + yaxis_title="Value", + hovermode="x unified" + ) + } + ) + return chart + + +def create_wan_graph_layout(data): + # Chart content + time = telemetry_parser.get_timestamp() + + byte_rcvd = telemetry_parser.get_telemetry_col(DML.WAN_BYTES_RCVD) + byte_sent = telemetry_parser.get_telemetry_col(DML.WAN_BYTES_SENT) + pkt_rcvd = telemetry_parser.get_telemetry_col(DML.WAN_PKT_RCVD) + pkt_sent = telemetry_parser.get_telemetry_col(DML.WAN_PKT_SENT) + + if byte_rcvd is None or byte_sent is None or pkt_rcvd is None or pkt_sent is None: + return dcc.Graph() + + chart = dcc.Graph( + figure={ + "data": [ + go.Scatter(x=time, y=byte_sent, name="Bytes Sent", mode="lines"), + go.Scatter(x=time, y=byte_rcvd, name="Bytes Received", mode="lines"), + go.Scatter(x=time, y=pkt_rcvd, name="Packet Sent", mode="lines"), + go.Scatter(x=time, y=pkt_sent, name="Packet Received", mode="lines"), + ], + "layout": go.Layout( + title="WAN stats", + xaxis_title="Report Time", + yaxis_title="Value", + hovermode="x unified" + ) + } + ) + return chart + +def create_wan_graph_layout(data): + # Chart content + time = telemetry_parser.get_timestamp() + + byte_rcvd = telemetry_parser.get_telemetry_col(DML.WAN_BYTES_RCVD) + byte_sent = telemetry_parser.get_telemetry_col(DML.WAN_BYTES_SENT) + pkt_rcvd = telemetry_parser.get_telemetry_col(DML.WAN_PKT_RCVD) + pkt_sent = telemetry_parser.get_telemetry_col(DML.WAN_PKT_SENT) + + if byte_rcvd is None or byte_sent is None or pkt_rcvd is None or pkt_sent is None: + return dcc.Graph() + + chart = dcc.Graph( + figure={ + "data": [ + go.Scatter(x=time, y=byte_sent, name="Bytes Sent", mode="lines"), + go.Scatter(x=time, y=byte_rcvd, name="Bytes Received", mode="lines"), + go.Scatter(x=time, y=pkt_rcvd, name="Packet Sent", mode="lines"), + go.Scatter(x=time, y=pkt_sent, name="Packet Received", mode="lines"), + ], + "layout": go.Layout( + title="WAN stats", + xaxis_title="Report Time", + yaxis_title="Value", + hovermode="x unified" + ) + } + ) + return chart + +def create_radio_stat_graph_layout(data): + # Chart content + time = telemetry_parser.get_timestamp() + + byte_rcvd = telemetry_parser.get_telemetry_col(DML.SSID1_BYTE_RCVD) + byte_sent = telemetry_parser.get_telemetry_col(DML.SSID1_BYTE_SENT) + pkt_rcvd = telemetry_parser.get_telemetry_col(DML.SSID1_PKT_RCVD) + pkt_sent = telemetry_parser.get_telemetry_col(DML.SSID1_PKT_SENT) + err_rcvd = telemetry_parser.get_telemetry_col(DML.SSID1_ERROR_RCVD) + err_sent = telemetry_parser.get_telemetry_col(DML.SSID1_ERROR_SENT) + + if byte_rcvd is None or byte_sent is None or pkt_rcvd is None or pkt_sent is None: + return dcc.Graph() + + if err_rcvd is None or err_sent is None: + return dcc.Graph() + + chart = dcc.Graph( + figure={ + "data": [ + go.Scatter(x=time, y=byte_sent, name="Bytes Sent", mode="lines"), + go.Scatter(x=time, y=byte_rcvd, name="Bytes Received", mode="lines"), + go.Scatter(x=time, y=pkt_rcvd, name="Packet Sent", mode="lines"), + go.Scatter(x=time, y=pkt_sent, name="Packet Received", mode="lines"), + go.Scatter(x=time, y=err_sent, name="Error Sent", mode="lines"), + go.Scatter(x=time, y=err_rcvd, name="Error Received", mode="lines"), + ], + "layout": go.Layout( + title="SSID 1 stats", + xaxis_title="Report Time", + yaxis_title="Value", + hovermode="x unified" + ) + } + ) + return chart + +@callback( + Output("dev-summary-card", "children"), + Output("dev-status-card", "children"), + Output("cpu-chart-card", "children"), + Output("mem-chart-card", "children"), + Output("network-stat-chart-card", "children"), + Output("radio-stat-chart-card", "children"), + Output("telemetry_exception_modal", "is_open"), + Output("telemetry_exception_modal_content", "children"), + [ + Input("telemetry-btn", "n_clicks"), + Input("telemetry_exception_modal_close", "n_clicks"), + ], +) +def click_run( + btn_click, modal_close +): + ctx = dash.callback_context + try: + if ctx.triggered: + prop_id = ctx.triggered[0]["prop_id"].split(".")[0] + #print(prop_id) + if prop_id == "telemetry-btn": + filename = "telemetry2_0" + config_json = file_manager.load_config(filename) + #print(config_json, flush=True) + telemetry_parser.start_processing() + data = telemetry_parser.telemetry_report + cpu = create_cpu_graph_layout(data) + summary = create_summary_layout(data) + sts = create_status_layout(data) + + mem_graph = create_mem_graph_layout(data) + wan_stat = create_wan_graph_layout(data) + radio_stat = create_radio_stat_graph_layout(data) + + return summary, sts, cpu, mem_graph, wan_stat, radio_stat, False, "" + + elif prop_id == "pattern_exception_modal_close": + return html.Div(),html.Div(),dcc.Graph(),dcc.Graph(),dcc.Graph(), dcc.Graph(), False, "" + else: + return html.Div(),html.Div(),dcc.Graph(),dcc.Graph(),dcc.Graph(), dcc.Graph(), False, "" + except Exception as error: + return html.Div(),html.Div(),dcc.Graph(),dcc.Graph(),dcc.Graph(), dcc.Graph(), True, str(error) diff --git a/gui/callbacks/test_anamoly.py b/gui/callbacks/test_anamoly.py new file mode 100644 index 0000000..cad73ca --- /dev/null +++ b/gui/callbacks/test_anamoly.py @@ -0,0 +1,83 @@ +import os + +from logai.applications.application_interfaces import ( + WorkFlowConfig, +) +from gui.demo.log_clustering import Clustering +from gui.file_manager import FileManager + +#from logai.applications.auto_log_summarization import AutoLogSummarization +from logai.dataloader.openset_data_loader import ( + FileDataLoader, +) +#from ..pages.utils import create_param_table +from logai.analysis.anomaly_detector import AnomalyDetectionConfig +from logai.algorithms.anomaly_detection_algo.one_class_svm import OneClassSVMDetector, OneClassSVMParams +from logai.applications.log_anomaly_detection import LogAnomalyDetection +from logai.applications.application_interfaces import WorkFlowConfig +from logai.information_extraction.categorical_encoder import CategoricalEncoderConfig +from logai.information_extraction.log_vectorizer import VectorizerConfig +from gui.demo.log_anomaly import LogAnomaly + +log_anomaly_demo = LogAnomaly() + +def test_parse(): + file_manager = FileManager() + config_json = file_manager.load_config("WiFilog.txt") + print(config_json, flush=True) + if config_json is not None: + config = WorkFlowConfig.from_dict(config_json) + + file_path = os.path.join(file_manager.merged_logs_path, "WiFilog.txt") + """ + params = log_pattern_demo.parse_parameters( + param_info=log_pattern_demo.get_parameter_info(parsing_algo), + params={ + p["Parameter"]: p["Value"] + for p in param_table["props"]["data"] + if p["Parameter"] + }, + ) + """ + config.data_loader_config.filepath = file_path + #workflow_config = WorkFlowConfig.from_dict(config) + + #OneClassSVMDetector(OneClassSVMParams()) + #config.anomaly_detection_config. + #OneClassSVMDetector. + #config.anomaly_detection_config = + # Create LogAnomalyDetection Application for given workflow_config + config.log_vectorizer_config = VectorizerConfig() + config.log_vectorizer_config.algo_name = "tfidf" + + config.categorical_encoder_config = CategoricalEncoderConfig() + config.categorical_encoder_config.algo_name = "one_hot_encoder" + print(config, flush=True) + app = LogAnomalyDetection(config) + + # Execute App + app.execute() + print("Labels \n", app.anomaly_labels) + print("Labels \n", app.anomaly_results) + #print("attributes", app.evaluation()) + """ + config.log_vectorizer_config = VectorizerConfig() + config.log_vectorizer_config.algo_name = "tfidf" + + config.categorical_encoder_config = CategoricalEncoderConfig() + config.categorical_encoder_config.algo_name = "one_hot_encoder" + + #config.clustering_config = ClusteringConfig() + #config.clustering_config.algo_name = "DBSCAN" + + interval_map = {0: "1s", 1: "1min", 2: "1h", 3: "1d"} + freq = interval_map[0] + config.feature_extractor_config.group_by_time = freq + config.anomaly_detection_config = AnomalyDetectionConfig(algo_name="logbert") + config.anomaly_detection_config.algo_name = "logbert" + + log_anomaly_demo.execute_anomaly_detection(config) + """ + +if __name__ == "__main__": + test_parse() \ No newline at end of file diff --git a/gui/callbacks/test_clustering.py b/gui/callbacks/test_clustering.py new file mode 100644 index 0000000..ebe1d63 --- /dev/null +++ b/gui/callbacks/test_clustering.py @@ -0,0 +1,56 @@ +import os + +from logai.applications.application_interfaces import ( + WorkFlowConfig, +) +from gui.demo.log_clustering import Clustering +from gui.file_manager import FileManager + +#from logai.applications.auto_log_summarization import AutoLogSummarization +from logai.dataloader.openset_data_loader import ( + FileDataLoader, +) +#from ..pages.utils import create_param_table +from logai.analysis.clustering import ClusteringConfig +from logai.information_extraction.categorical_encoder import CategoricalEncoderConfig +from logai.information_extraction.log_vectorizer import VectorizerConfig + +log_clustering = Clustering() + +def test_parse(): + file_manager = FileManager() + config_json = file_manager.load_config("WiFilog.txt") + print(config_json, flush=True) + if config_json is not None: + config = WorkFlowConfig.from_dict(config_json) + print(config, flush=True) + + file_path = os.path.join(file_manager.merged_logs_path, "WiFilog.txt") + """ + params = log_pattern_demo.parse_parameters( + param_info=log_pattern_demo.get_parameter_info(parsing_algo), + params={ + p["Parameter"]: p["Value"] + for p in param_table["props"]["data"] + if p["Parameter"] + }, + ) + """ + config.data_loader_config.filepath = file_path + + config.log_vectorizer_config = VectorizerConfig() + config.log_vectorizer_config.algo_name = "tfidf" + + config.categorical_encoder_config = CategoricalEncoderConfig() + config.categorical_encoder_config.algo_name = "one_hot_encoder" + + config.clustering_config = ClusteringConfig() + config.clustering_config.algo_name = "DBSCAN" + + log_clustering.execute_clustering(config) + #print("log pattern", log_pattern_demo) + print("attributes", log_clustering.get_attributes()) + + +if __name__ == "__main__": + test_parse() \ No newline at end of file diff --git a/gui/callbacks/test_pattern.py b/gui/callbacks/test_pattern.py new file mode 100644 index 0000000..e4b7c6d --- /dev/null +++ b/gui/callbacks/test_pattern.py @@ -0,0 +1,44 @@ +import os + +from logai.applications.application_interfaces import ( + WorkFlowConfig, +) + +from gui.file_manager import FileManager +from gui.demo.log_pattern import LogPattern +#from logai.applications.auto_log_summarization import AutoLogSummarization +from logai.dataloader.openset_data_loader import ( + FileDataLoader, +) +#from ..pages.utils import create_param_table + +log_pattern_demo = LogPattern() + +def test_parse(): + file_manager = FileManager() + config_json = file_manager.load_config("CUJOAGENT.log") + print(config_json, flush=True) + if config_json is not None: + config_json['anomaly_detection_config'] = None + config = WorkFlowConfig.from_dict(config_json) + print(config, flush=True) + + file_path = os.path.join(file_manager.merged_logs_path, "CUJOAGENT.log") + """ + params = log_pattern_demo.parse_parameters( + param_info=log_pattern_demo.get_parameter_info(parsing_algo), + params={ + p["Parameter"]: p["Value"] + for p in param_table["props"]["data"] + if p["Parameter"] + }, + ) + """ + config.data_loader_config.filepath = file_path + log_pattern_demo.execute_auto_parsing(config) + #print("log pattern", log_pattern_demo) + print("attributes", log_pattern_demo.get_attributes()) + + +if __name__ == "__main__": + test_parse() \ No newline at end of file diff --git a/gui/callbacks/test_telemetry.py b/gui/callbacks/test_telemetry.py new file mode 100644 index 0000000..c363f4e --- /dev/null +++ b/gui/callbacks/test_telemetry.py @@ -0,0 +1,24 @@ +import os + +from gui.file_manager import FileManager + +from logai.preprocess.telemetry_parser import Telemetry2Parser +import pandas as pd + +telemetry_parser = Telemetry2Parser() + +def create_summary_layout(data=pd.DataFrame()): + print(data["searchResult.Time"]) + latest = data.sort_values('searchResult.Time').iloc[-1] + +def test_parse(): + file_manager = FileManager() + filename = "telemetry2_0" + config_json = file_manager.load_config(filename) + #print(config_json, flush=True) + telemetry_parser.start_processing() + data = telemetry_parser.telemetry_report + create_summary_layout(data) + +if __name__ == "__main__": + test_parse() \ No newline at end of file diff --git a/gui/callbacks/utils.py b/gui/callbacks/utils.py index 795b067..6dc0322 100755 --- a/gui/callbacks/utils.py +++ b/gui/callbacks/utils.py @@ -6,27 +6,51 @@ # # import dash_bootstrap_components as dbc - +import dash from dash import html, Input, Output, State, callback, dash_table from gui.file_manager import FileManager +@callback( + Output("file-select", "options"), + Output("file-select", "value"), + [ + Input('restore-dropdown-value', 'children'), + Input("upload-data", "filename"), + Input("upload-data", "contents") + ], +) +def upload_file(_, uploaded_filenames, uploaded_file_contents): + options = [] + file_manager = FileManager() + ctx = dash.callback_context + + if ctx.triggered: + prop_id = ctx.triggered[0]["prop_id"].split(".")[0] + if prop_id == "upload-data": + if uploaded_filenames is not None and uploaded_file_contents is not None: + for name, data in zip(uploaded_filenames, uploaded_file_contents): + file_manager.save_file(name, data) + + file_manager.process_uploaded_files() + else: + pass + #print("Prop_id", prop_id, flush=True) + else: + pass + #print("UPload file else case", flush=True) -file_manager = FileManager() + files = file_manager.list_merged_files() + for filename in files: + options.append({"label": filename, "value": filename}) -@callback( - Output("upload-status", "children"), - [Input("upload-data", "filename"), Input("upload-data", "contents")], -) -def upload_file(uploaded_filenames, uploaded_file_contents): - if uploaded_filenames is not None and uploaded_file_contents is not None: - for name, data in zip(uploaded_filenames, uploaded_file_contents): - file_manager.save_file(name, data) - return html.Div("Upload Success!") + if len(options) > 0: + return options, options[0]["label"] else: - return html.Div("File Already Exists!") + return options, "" +""" @callback( Output("file-select", "options"), Output("file-select", "value"), @@ -67,3 +91,4 @@ def custom_file_setting(dataset_name): ) else: return html.Div() +""" diff --git a/gui/diagnose_broadband.py b/gui/diagnose_broadband.py new file mode 100644 index 0000000..f55ea9b --- /dev/null +++ b/gui/diagnose_broadband.py @@ -0,0 +1,62 @@ +import pandas as pd +from transformers import AutoTokenizer, AutoModelForCausalLM +import torch + +def load_selected_columns(column_file: str) -> list: + with open(column_file, "r") as f: + return [line.strip() for line in f if line.strip()] + +def run_diagnosis(model_dir: str, test_file: str, column_file: str = "column_list.txt"): + # === Load model === + tokenizer = AutoTokenizer.from_pretrained(model_dir) + model = AutoModelForCausalLM.from_pretrained(model_dir) + + # === Load required columns from file === + required_columns = load_selected_columns(column_file) + + # === Read Excel file and select only required columns === + df = pd.read_excel(test_file, engine="openpyxl") + df = df[[col for col in required_columns if col in df.columns]] + df = df.astype(str).apply(lambda x: x.str.strip()) + + + # ✅ Initialize results list here + results = [] + + # === Run inference === + for idx, row in df.iterrows(): + + instruction = "Diagnose broadband anomaly" + input_text = ( + f"Time: {row.get('Report.Time', 'NA')}, " + f"MAC: {row.get('Report.mac', 'NA')}, " + f"WiFi1: {row.get('Report.Device.WiFi.SSID.1.Status', 'NA')}, " + f"WiFi2: {row.get('Report.Device.WiFi.SSID.2.Status', 'NA')}, " + f"Radio1: {row.get('Report.Device.WiFi.Radio.1.Status', 'NA')}, " + f"Radio2: {row.get('Report.Device.WiFi.Radio.2.Status', 'NA')}, " + ) + prompt = f"### Instruction:\n{instruction}\n\n### Input:\n{input_text}\n\n### Response:\n" + inputs = tokenizer(prompt, return_tensors="pt").to("cpu") + + with torch.no_grad(): + outputs = model.generate( + **inputs, + max_new_tokens=100, + do_sample=True, + temperature=0.7, + top_k=50, + top_p=0.95, + pad_token_id=tokenizer.eos_token_id, + ) + + full_output = tokenizer.decode(outputs[0], skip_special_tokens=True) + response = full_output.split("### Response:")[-1].strip() + + results.append({ + "Time": row.get("Report.Time", "NA"), + "MAC": row.get("Report.mac", "NA"), + "Diagnosis": response + }) + + return results + diff --git a/gui/file_manager.py b/gui/file_manager.py index 4db4d61..d3ccc16 100644 --- a/gui/file_manager.py +++ b/gui/file_manager.py @@ -7,26 +7,72 @@ # import os import base64 +import shutil +import tarfile +import json +import re +from collections import defaultdict +from dataclasses import dataclass from dash import html from urllib.parse import quote as urlquote +from logai.utils.constants import UPLOAD_DIRECTORY, MERGED_LOGS_DIRECTORY + +from typing import List, Optional, Tuple, Dict, Any + +from logai.preprocess.telemetry_parser import Telemetry2Parser + +@dataclass +class ConfigEntry: + name: str + supported_config: str + supported_files: List[str] + +@dataclass +class ConfigIndex: + supported_files: List[ConfigEntry] + + @staticmethod + def load_from_file(index_path: str) -> 'ConfigIndex': + with open(index_path, 'r') as f: + raw_data = json.load(f) + entries = [ConfigEntry(**entry) for entry in raw_data.get("supported_files", [])] + return ConfigIndex(supported_files=entries) + + def find_config_for_file(self, filename: str) -> str: + filename_base = os.path.basename(filename) + + for entry in self.supported_files: + for supported_name in entry.supported_files: + if supported_name.lower() in filename_base.lower(): + return entry.supported_config + raise ValueError(f"No config found for file: {filename}") class FileManager: - def __init__(self, directory=None): - if directory is None: - self.directory = os.path.join( - os.path.dirname(os.path.abspath(__file__)), "uploaded_files" - ) - else: - self.directory = directory + """Processor for handling uploaded files in the application.""" + def __init__(self): + self.directory = UPLOAD_DIRECTORY + self.merged_logs_path = MERGED_LOGS_DIRECTORY + #self.base_directory = path if not os.path.exists(self.directory): os.makedirs(self.directory) + + if not os.path.exists(self.merged_logs_path): + os.makedirs(self.merged_logs_path) + # === Save uploaded file to local folder === + def save_file(self, name, content): + content_type, content_string = content.split(',') + decoded = base64.b64decode(content_string) + file_path = os.path.join(self.directory, name) + with open(file_path, "wb") as f: + f.write(decoded) + """ def save_file(self, name, content): data = content.encode("utf8").split(b";base64,")[1] with open(os.path.join(self.directory, name), "wb") as fp: fp.write(base64.decodebytes(data)) - + """ def uploaded_files(self): files = [] for filename in os.listdir(self.directory): @@ -38,7 +84,209 @@ def uploaded_files(self): def file_download_link(self, filename): location = "/download/{}".format(urlquote(filename)) return html.A(filename, href=location) + + def process_uploaded_files(self): + """Process uploaded files by extracting and merging logs.""" + if not os.path.exists(self.directory): + raise FileNotFoundError(f"Upload directory '{self.path}' does not exist.") + + # Create a temporary directory for extraction + temp_dir = os.path.join(self.directory, "temp") + os.makedirs(temp_dir, exist_ok=True) + + for file in os.listdir(self.directory): + filename = file.lower() + if filename.endswith(".tgz") or filename.endswith(".tar.gz"): + src_file = os.path.join(self.directory, file) + base = file.rsplit('.', 2)[0] + dest = os.path.join(temp_dir, base) + os.makedirs(dest, exist_ok=True) + with tarfile.open(src_file, "r:gz") as tar: + tar.extractall(path=dest) + + self._merge_files(temp_dir, output_dir=os.path.join(self.directory, "merged_logs")) + # remove temporary directory + shutil.rmtree(temp_dir) + + # Extract Telemetry Profiles + temp_telemetry_parser = Telemetry2Parser() + temp_telemetry_parser.extract_telemetry_reports() + """ + def _merge_files(self,temp_dir, output_dir="./merged_logs"): + os.makedirs(output_dir, exist_ok=True) + folder_path = temp_dir + folders = os.listdir(folder_path) + folders.sort() + + for dirname in folders: + content = os.path.join(folder_path, dirname) + dirs = os.listdir(content) + if len(dirs) == 1: + for inner_dirname in dirs: + in_path = os.path.join(content, inner_dirname) + else: + in_path = content + + for in_filename in os.listdir(in_path): + if ( + "2023" not in in_filename + and "2024" not in in_filename + and "2025" not in in_filename + ): + out_filename = in_filename + else: + out_filename = in_filename[19:] + if "024-" in out_filename: + out_filename = in_filename[37:] + if "1_" in out_filename: + out_filename = out_filename[2:] + if out_filename and out_filename[0] in "0123456789_": + out_filename = out_filename[1:] + + out_file = os.path.join(output_dir, out_filename) + in_file = os.path.join(in_path, in_filename) + + with open(in_file, "rb") as rd, open(out_file, "ab") as wr: + #message = "****Merging " + in_file + " **********\n" + #wr.write(message.encode("utf-8")) + shutil.copyfileobj(rd, wr) + """ + def _merge_files(self, temp_dir, output_dir="./merged_logs"): + os.makedirs(output_dir, exist_ok=True) + folder_path = temp_dir + folders = os.listdir(folder_path) + folders.sort() + + # ----------- Stage 1: Initial merge by normalized name ----------- + for dirname in folders: + content = os.path.join(folder_path, dirname) + dirs = os.listdir(content) + if len(dirs) == 1: + in_path = os.path.join(content, dirs[0]) + else: + in_path = content + + for in_filename in os.listdir(in_path): + if "2023" not in in_filename and "2024" not in in_filename and "2025" not in in_filename: + out_filename = in_filename + else: + out_filename = in_filename[19:] + if "024-" in out_filename: + out_filename = in_filename[37:] + if "1_" in out_filename: + out_filename = out_filename[2:] + if out_filename and out_filename[0] in "0123456789_": + out_filename = out_filename[1:] + + out_file = os.path.join(output_dir, out_filename) + in_file = os.path.join(in_path, in_filename) + + with open(in_file, "rb") as rd, open(out_file, "ab") as wr: + shutil.copyfileobj(rd, wr) + + # ----------- Stage 2: Merge rotated logs and delete originals ----------- + merged_files = os.listdir(output_dir) + base_map = defaultdict(list) + + for filename in merged_files: + match = re.match(r"(.+?)(?:\.(\d+))?$", filename) + if match: + base_name = match.group(1) + base_map[base_name].append(filename) + + for base_name, versions in base_map.items(): + if len(versions) <= 1: + continue + + def suffix_key(f): + m = re.search(r"\.(\d+)$", f) + return int(m.group(1)) if m else -1 + + versions.sort(key=suffix_key) + merged_path = os.path.join(output_dir, base_name + "_final_merged.log") + + with open(merged_path, "wb") as wr: + for version in versions: + full_path = os.path.join(output_dir, version) + with open(full_path, "rb") as rd: + shutil.copyfileobj(rd, wr) + os.remove(full_path) + + #print(f"Merged and cleaned: {versions} -> {merged_path}") + + # ----------- Stage 3: Sort _final_merged.log files by timestamp string ----------- + for filename in os.listdir(output_dir): + if filename.endswith("_final_merged.log"): + file_path = os.path.join(output_dir, filename) + try: + with open(file_path, "r", encoding="utf-8", errors="ignore") as f: + lines = f.readlines() + + def extract_ts_str(line: str): + parts = line.split() + return parts[0] if parts else "" + + lines.sort(key=extract_ts_str) + + with open(file_path, "w", encoding="utf-8") as f: + f.writelines(lines) + + #print(f"Sorted by timestamp string: {filename}") + except Exception as e: + print(f"Error sorting {filename}: {e}") + + for filename in os.listdir(output_dir): + if filename.endswith("_final_merged.log"): + file_path = os.path.join(output_dir, filename) + base_fname = os.path.basename(file_path) + remove_tag = base_fname.replace("_final_merged.log", "") + new_file_name = os.path.join(output_dir, remove_tag) + #print("file {} rename to {} \n".format(file_path, new_file_name)) + os.rename(file_path, new_file_name) + + def clean_temp_files(self): + for name in os.listdir(self.directory): + full_path = os.path.join(self.directory, name) + if os.path.isdir(full_path): + shutil.rmtree(full_path) + else: + os.remove(full_path) + #os.rmdir(input_dir) + #print(f"Cleaned all contents from {input_dir}") + + def list_uploaded_files(self): + """List all files saved in the uploads folder.""" + try: + return sorted(os.listdir(self.directory)) + except FileNotFoundError: + return [] + + def list_merged_files(self): + """List all files saved in the uploads folder.""" + try: + merged_logs_path = MERGED_LOGS_DIRECTORY + return sorted(os.listdir(merged_logs_path), reverse=True) + except FileNotFoundError: + return [] + + def load_config(self, filename): + + root_dir = os.path.dirname(os.path.abspath(__file__)) + config_list_path = os.path.join(root_dir, "../configs", "config_list.json") - @property - def base_directory(self): - return self.directory + if os.path.exists(config_list_path): + #print(f"Loading config from {config_list_path}") + self.config_index = ConfigIndex.load_from_file(config_list_path) + if self.config_index: + file_config = self.config_index.find_config_for_file(filename) + self.config_path = os.path.join(root_dir, "../configs", file_config) + #print("config {}, path {}".format(file_config, self.config_path)) + if os.path.exists(self.config_path): + try: + with open(self.config_path, 'r') as f: + raw_data = json.load(f) + return raw_data + except json.JSONDecodeError as e: + print(f"Error decoding invalid JSON: {e}\n") + except Exception as e: + print(f"An unexpected error occurred: {e}\n") \ No newline at end of file diff --git a/gui/pages/ai_analysis.py b/gui/pages/ai_analysis.py new file mode 100644 index 0000000..aa79fc3 --- /dev/null +++ b/gui/pages/ai_analysis.py @@ -0,0 +1,68 @@ +import dash_bootstrap_components as dbc +from dash import dcc, html + +from .utils import ( + create_modal, + create_description_card, + create_upload_file_layout, + create_file_setting_layout, + create_param_table, + create_run_button +) + + +"""def create_control_card(): + return html.Div( + id="control-card", + children=[ + create_upload_file_layout(), + create_file_setting_layout(), + create_summarization_algo_setting_layout(), + html.Hr(), + create_run_button("pattern-btn"), + create_modal( + modal_id="pattern_exception_modal", + header="An Exception Occurred", + content="An exception occurred. Please click OK to continue.", + content_id="pattern_exception_modal_content", + button_id="pattern_exception_modal_close", + ), + ], + )""" +def create_ai_analysis_layout(): + return dbc.Row( + [ + # Left column + dbc.Col( + html.Div( + [ + create_description_card(), + #create_control_card(), + html.Div( + ["initial child"], + id="output-clientside", + style={"display": "none"}, + ), + ] + ), + width=2, + ), + # Right column + dbc.Col( + html.Div( + [ + dbc.Row([ + dbc.Col([ + dbc.Button("Run AI Analysis", id="run-ai-script-btn", color="primary"), + html.Pre(id="ai-script-output", className="mt-3", style={ + "whiteSpace": "pre-wrap", + "maxHeight": "500px", + "overflowY": "auto" + }) + ], width=8) + ], justify="center") + ]) + ) +] +) +layout = create_ai_analysis_layout() diff --git a/gui/pages/anomaly_detection.py b/gui/pages/anomaly_detection.py index 07c3458..a1c5f40 100755 --- a/gui/pages/anomaly_detection.py +++ b/gui/pages/anomaly_detection.py @@ -11,8 +11,10 @@ from .utils import ( create_description_card, create_modal, + create_upload_file_layout, create_file_setting_layout, create_param_table, + create_run_button ) @@ -20,14 +22,11 @@ def create_control_card(): return html.Div( id="control-card", children=[ - # create_upload_file_layout(), + create_upload_file_layout(), create_file_setting_layout(), create_ad_algo_setting_layout(), html.Hr(), - html.Div( - children=[html.Button(id="anomaly-btn", children="Run", n_clicks=0)], - style={"textAlign": "center"}, - ), + create_run_button("anomaly-btn"), create_modal( modal_id="anomaly_exception_modal", header="An Exception Occurred", @@ -52,20 +51,6 @@ def create_ad_algo_setting_layout(): ), html.Div(id="ad-parsing-param-table", children=[create_param_table()]), html.Br(), - html.B("Vectorization Algortihm"), - dcc.Dropdown( - id="vectorization-algo-select", - options=["word2vec", "tfidf", "fasttext"], - value="word2vec", - ), - html.Br(), - html.B("Categorical Encoder"), - dcc.Dropdown( - id="categorical-encoder-select", - options=["label_encoder", "one_hot_encoder", "ordinal_encoder"], - value="label_encoder", - ), - html.Br(), html.B("Anomaly Detection Algortihm"), dcc.Dropdown( id="ad-algo-select", @@ -94,12 +79,20 @@ def create_display_layout(): dbc.CardBody( [ dcc.Loading( - id="loading-timechart", - children=[dbc.Row(dcc.Graph(id="time_chart"))], - type="default", + [ + #id="loading-timechart", + html.Div( + children=[ + dcc.Graph(id="time_chart") + ], + style={'overflowX': 'scroll', 'height': '400px', 'border': '1px solid lightgray'} + ) + #children=[dbc.Row(dcc.Graph(id="time_chart"))], + #type="default", + ] ) ], - style={"marginTop": 0, "marginBottom": 0}, + #style={"marginTop": 0, "marginBottom": 0}, ), ), html.B("Anomalies"), @@ -114,6 +107,7 @@ def create_display_layout(): ) ] ), + style={"maxwidth": "900px"}, ), ], ) diff --git a/gui/pages/clustering.py b/gui/pages/clustering.py index 409fe1a..4ea648e 100755 --- a/gui/pages/clustering.py +++ b/gui/pages/clustering.py @@ -11,8 +11,10 @@ from .utils import ( create_modal, create_description_card, + create_upload_file_layout, create_file_setting_layout, create_param_table, + create_cluster_run_button ) @@ -20,14 +22,11 @@ def create_control_card(): return html.Div( id="control-card", children=[ - # create_upload_file_layout(), + create_upload_file_layout(), create_file_setting_layout(), - create_clustering_algo_setting_layout(), + #create_clustering_algo_setting_layout(), html.Hr(), - html.Div( - children=[html.Button(id="clustering-btn", children="Run", n_clicks=0)], - style={"textAlign": "center"}, - ), + create_cluster_run_button("clustering-btn"), create_modal( modal_id="clustering_exception_modal", header="An Exception Occurred", @@ -47,7 +46,7 @@ def create_clustering_algo_setting_layout(): html.B("Parsing Algortihm"), dcc.Dropdown( id="parsing-algo-select", - options=["DRAIN", "IPLoM", "AEL"], + options=["DRAIN3","DRAIN", "IPLoM", "AEL"], value="DRAIN", ), html.Div( @@ -58,7 +57,7 @@ def create_clustering_algo_setting_layout(): dcc.Dropdown( id="vectorization-algo-select", options=["word2vec", "tfidf", "fasttext"], - value="word2vec", + value="tfidf", ), html.Br(), html.B("Categorical Encoder"), @@ -72,7 +71,7 @@ def create_clustering_algo_setting_layout(): dcc.Dropdown( id="clustering-algo-select", options=["DBSCAN", "kmeans"], - value="kmeans", + value="DBSCAN", ), html.Div(id="clustering-param-table", children=[create_param_table()]), ], @@ -99,7 +98,8 @@ def create_display_layout(): ) ], style={"marginTop": 0, "marginBottom": 0}, - ) + ), + style={"maxwidth": "900px"}, ), html.B("Loglines in Cluster"), html.Hr(), @@ -163,6 +163,37 @@ def create_clustering_layout(): ) ] ), + html.B("Total Charts"), + html.Hr(), + dbc.Row( + [ + dbc.Col( + dbc.Card( + dbc.CardBody( + [ + dcc.Loading( + [ + dcc.Graph(id="pattern-graph_total"), + ] + ) + ] + ) + ), + width=12, + ), + ], + ), + html.B("Cluster template"), + html.Hr(), + dbc.Card( + dbc.CardBody( + [ + html.Div( + id="cluster-tmp", + ) + ], + ), + ), ] ), ], diff --git a/gui/pages/pattern.py b/gui/pages/pattern.py index ccd8257..31bfcb9 100755 --- a/gui/pages/pattern.py +++ b/gui/pages/pattern.py @@ -11,8 +11,10 @@ from .utils import ( create_modal, create_description_card, + create_upload_file_layout, create_file_setting_layout, create_param_table, + create_run_button ) @@ -20,14 +22,11 @@ def create_control_card(): return html.Div( id="control-card", children=[ - # create_upload_file_layout(), + create_upload_file_layout(), create_file_setting_layout(), create_summarization_algo_setting_layout(), html.Hr(), - html.Div( - children=[html.Button(id="pattern-btn", children="Run", n_clicks=0)], - style={"textAlign": "center"}, - ), + create_run_button("pattern-btn"), create_modal( modal_id="pattern_exception_modal", header="An Exception Occurred", diff --git a/gui/pages/telemetry.py b/gui/pages/telemetry.py new file mode 100755 index 0000000..1487b89 --- /dev/null +++ b/gui/pages/telemetry.py @@ -0,0 +1,114 @@ +# +# Copyright (c) 2023 Salesforce.com, inc. +# All rights reserved. +# SPDX-License-Identifier: BSD-3-Clause +# For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause +# +# +import dash_bootstrap_components as dbc +from dash import dcc, html + +from .utils import ( + create_modal, + create_description_card, + create_upload_file_layout, + create_run_button +) + + +def create_control_card(): + return html.Div( + id="control-card", + children=[ + create_upload_file_layout(), + #create_file_setting_layout(), + html.Hr(), + create_run_button("telemetry-btn"), + create_modal( + modal_id="telemetry_exception_modal", + header="An Exception Occurred", + content="An exception occurred. Please click OK to continue.", + content_id="telemetry_exception_modal_content", + button_id="telemetry_exception_modal_close", + ), + ], + ) + +def create_timeseries_grapy_layout(): + return html.Div( + children=[ + dcc.Graph(id="telemetry-time-series"), + ], + # style={ + # 'display': 'inline-block', + # 'width': '59%' + # }, + ) + + +def create_telemetry_layout(): + return dbc.Row( + [ + # Left column + dbc.Col( + html.Div( + [ + create_description_card(), + create_control_card(), + html.Div( + ["initial child"], + id="output-clientside", + style={"display": "none"}, + ), + ] + ), + width=2, + ), + # Right column + dbc.Col( + html.Div( + [ + html.H4("Telemetry Summarizaton"), + html.Hr(), + dbc.Row( + [ + dbc.Col(dbc.Card([ + dbc.CardHeader("Device Info"), + dbc.CardBody(id="dev-summary-card", children=html.Div("Click 'Run' to load summary.")) + ]), width=4), + dbc.Col(dbc.Card([ + dbc.CardHeader("Device Status"), + dbc.CardBody(id="dev-status-card", children=html.Div("Click 'Run' to load summary.")) + ]), width=4), + ], + ), + html.Hr(), + dbc.Row([ + dbc.Col(dbc.Card([ + dbc.CardHeader("Memory Split"), + dbc.CardBody(id="mem-chart-card", children=html.Div("Click 'Run' to load chart.")) + ]), width=6), + dbc.Col(dbc.Card([ + dbc.CardHeader("Network Stats"), + dbc.CardBody(id="network-stat-chart-card", children=html.Div("Click 'Run' to load chart.")) + ]), width=6), + ]), + html.Hr(), + dbc.Row([ + dbc.Col(dbc.Card([ + dbc.CardHeader("CPU Usage vs CPU Temp"), + dbc.CardBody(id="cpu-chart-card", children=html.Div("Click 'Run' to load chart.")) + ]), width=6), + dbc.Col(dbc.Card([ + dbc.CardHeader("Radio Stats"), + dbc.CardBody(id="radio-stat-chart-card", children=html.Div("Click 'Run' to load chart.")) + ]), width=6), + ]) + ] + ) + ), + ] + ) + + +layout = create_telemetry_layout() diff --git a/gui/pages/utils.py b/gui/pages/utils.py index 1ff3b98..1d92e1f 100755 --- a/gui/pages/utils.py +++ b/gui/pages/utils.py @@ -41,14 +41,64 @@ def emit(self, record): except Exception: self.handleError(record) +def create_run_button(button_id): + return dbc.Row( + [ + dbc.Col( + [ + html.Div( + children=[html.Button(id=button_id, children="Run", n_clicks=0)], + style={"textAlign": "center"}, + ), + ] + ), + ] + ) +def create_run_button_all(button_id): + button_id_all = button_id + "-all" + return dbc.Row( + [ + dbc.Col( + [ + html.Div( + children=[html.Button(id=button_id, children="Run-All", n_clicks=0)], + style={"textAlign": "center"}, + ), + ] + ), + ] + ) + +def create_cluster_run_button(button_id): + button_id_all = button_id + "-all" + return dbc.Row( + [ + dbc.Col( + [ + html.Div( + children=[html.Button(id=button_id, children="Run", n_clicks=0)], + style={"textAlign": "center"}, + ), + ] + ), + dbc.Col( + [ + html.Div( + children=[html.Button(id=button_id_all, children="Run-All", n_clicks=0)], + style={"textAlign": "center"}, + ), + ] + ), + ] + ) def create_banner(app): return html.Div( id="banner", className="banner", children=[ - html.Img(src=app.get_asset_url("logai_logo.jpg")), - html.Plaintext(" Powered by Salesforce AI Research"), + html.Img(src=app.get_asset_url("logai_logo.png"), style={'width': '10rem', 'height': '10rem'}), + html.Plaintext(" AI-Powered Fault Detection, Prediction and Auto-Healing for Home connectivity"), ], ) @@ -57,7 +107,7 @@ def create_description_card(): return html.Div( id="description-card", children=[ - html.H4("AI-based Log Analysis"), + html.H3("AI-based Log Analysis"), html.Div([create_menu()]), html.Div(id="intro", children=" "), ], @@ -70,6 +120,14 @@ def create_menu(): dbc.Row( dcc.Link( "Log Summarization", + href="/logai/telemetry", + className="tab first", + style={"font-weight": "bold", "text-decoration": "underline"}, + ) + ), + dbc.Row( + dcc.Link( + "Log Parsing", href="/logai/pattern", className="tab first", style={"font-weight": "bold", "text-decoration": "underline"}, @@ -91,6 +149,14 @@ def create_menu(): style={"font-weight": "bold", "text-decoration": "underline"}, ) ), + dbc.Row( + dcc.Link( + "AI Analysis", + href="/logai/ai_analysis", + className="tab fourth", + style={"font-weight": "bold", "text-decoration": "underline"}, + ) + ), ], ) return menu @@ -121,8 +187,8 @@ def create_upload_file_layout(): return html.Div( id="upload-file-layout", children=[ - html.Br(), - html.P("Upload Log File"), + #html.Br(), + html.B("Upload Log File"), dcc.Upload( id="upload-data", children=html.Div(["Drag and Drop or Select a File"]), @@ -141,7 +207,6 @@ def create_upload_file_layout(): ], ) - def create_file_setting_layout(): return html.Div( id="file-setting-layout", @@ -149,17 +214,14 @@ def create_file_setting_layout(): html.Br(), html.B("File Settings"), html.Hr(), - html.Label("Log Type"), - dcc.Dropdown( - id="log-type-select", - options=["HDFS", "BGL", "HealthApp", "Custom"], - value="HDFS", - ), dbc.Row(dbc.Col([html.Div(id="custom-file-setting")])), html.Label("Log File"), - dcc.Dropdown(id="file-select", style={"width": "100%"}), - html.Label("Attributes"), - dcc.Dropdown(id="attribute-name-options", multi=True), + dcc.Dropdown(id="file-select", + options=["No File Selected!"], + value="No File Selected", + style={"width": "100%"}), + #html.Label("Attributes"), + #dcc.Dropdown(id="attribute-name-options", multi=True), html.Label("Time Interval"), dcc.Slider( 0, diff --git a/img/LogAIDesign-2.png b/img/LogAIDesign-2.png deleted file mode 100644 index c9b6b8f..0000000 Binary files a/img/LogAIDesign-2.png and /dev/null differ diff --git a/img/RDK_One3B_LandingPage.png b/img/RDK_One3B_LandingPage.png new file mode 100644 index 0000000..20cfbaa Binary files /dev/null and b/img/RDK_One3B_LandingPage.png differ diff --git a/img/RDK_One3B_LogClustering.png b/img/RDK_One3B_LogClustering.png new file mode 100644 index 0000000..359b152 Binary files /dev/null and b/img/RDK_One3B_LogClustering.png differ diff --git a/img/RDK_One3B_LogParsing.png b/img/RDK_One3B_LogParsing.png new file mode 100644 index 0000000..f3d6e90 Binary files /dev/null and b/img/RDK_One3B_LogParsing.png differ diff --git a/img/RDK_One3B_LogSummarization.png b/img/RDK_One3B_LogSummarization.png new file mode 100644 index 0000000..5d7043f Binary files /dev/null and b/img/RDK_One3B_LogSummarization.png differ diff --git a/img/icon.jpg b/img/icon.jpg deleted file mode 100644 index 10f5232..0000000 Binary files a/img/icon.jpg and /dev/null differ diff --git a/img/log_summarization.png b/img/log_summarization.png deleted file mode 100644 index 0672d45..0000000 Binary files a/img/log_summarization.png and /dev/null differ diff --git a/img/logai-design-principle-2.png b/img/logai-design-principle-2.png deleted file mode 100644 index fc328e0..0000000 Binary files a/img/logai-design-principle-2.png and /dev/null differ diff --git a/img/logai_anomaly_detection.png b/img/logai_anomaly_detection.png deleted file mode 100644 index 90cb964..0000000 Binary files a/img/logai_anomaly_detection.png and /dev/null differ diff --git a/img/logai_clustering_algorithm.png b/img/logai_clustering_algorithm.png deleted file mode 100644 index d6267ea..0000000 Binary files a/img/logai_clustering_algorithm.png and /dev/null differ diff --git a/img/logai_clustering_res.png b/img/logai_clustering_res.png deleted file mode 100644 index fb082cd..0000000 Binary files a/img/logai_clustering_res.png and /dev/null differ diff --git a/img/logai_file_setting.png b/img/logai_file_setting.png deleted file mode 100644 index 4906965..0000000 Binary files a/img/logai_file_setting.png and /dev/null differ diff --git a/img/logai_gui_landing.png b/img/logai_gui_landing.png deleted file mode 100644 index 6cd5353..0000000 Binary files a/img/logai_gui_landing.png and /dev/null differ diff --git a/img/logai_logo.jpg b/img/logai_logo.jpg deleted file mode 100644 index bbaaac4..0000000 Binary files a/img/logai_logo.jpg and /dev/null differ diff --git a/img/logai_logo.png b/img/logai_logo.png new file mode 100644 index 0000000..7326b12 Binary files /dev/null and b/img/logai_logo.png differ diff --git a/img/logai_parsing_algo.png b/img/logai_parsing_algo.png deleted file mode 100644 index e0908b2..0000000 Binary files a/img/logai_parsing_algo.png and /dev/null differ diff --git a/img/logai_summarization_res.png b/img/logai_summarization_res.png deleted file mode 100644 index 2da456b..0000000 Binary files a/img/logai_summarization_res.png and /dev/null differ diff --git a/logai/algorithms/anomaly_detection_algo/dbl.py b/logai/algorithms/anomaly_detection_algo/dbl.py index 927ed57..d054bed 100644 --- a/logai/algorithms/anomaly_detection_algo/dbl.py +++ b/logai/algorithms/anomaly_detection_algo/dbl.py @@ -8,7 +8,7 @@ from datetime import datetime import pandas as pd -from attr import dataclass +from dataclasses import dataclass, field from typing import Tuple, List from merlion.models.anomaly.dbl import DynamicBaseline, DynamicBaselineConfig @@ -40,7 +40,7 @@ class DBLDetectorParams(Config): train_window: str = None wind_sz: str = "1h" trends: List[str] = None - kwargs: dict = {} + kwargs: dict = field(default_factory=dict) @factory.register("detection", "dbl", DBLDetectorParams) diff --git a/logai/algorithms/anomaly_detection_algo/distribution_divergence.py b/logai/algorithms/anomaly_detection_algo/distribution_divergence.py index d0394a3..86a7339 100755 --- a/logai/algorithms/anomaly_detection_algo/distribution_divergence.py +++ b/logai/algorithms/anomaly_detection_algo/distribution_divergence.py @@ -7,7 +7,7 @@ # import numpy as np import pandas as pd -from attr import dataclass +from dataclasses import dataclass, field from logai.algorithms.algo_interfaces import AnomalyDetectionAlgo from logai.config_interfaces import Config @@ -40,7 +40,7 @@ class DistributionDivergenceParams(Config): ("JS"). It also allows a comma separated list of metrics like ("KL,JS" or "JS,KL"). """ n_bins: int = 100 - type: list = ["KL"] # "KL", "JS", "KL,JS" + type: list = field(default_factory="[KL]") #["KL"] # "KL", "JS", "KL,JS" @factory.register("detection", "distribution_divergence", DistributionDivergenceParams) diff --git a/logai/algorithms/anomaly_detection_algo/ets.py b/logai/algorithms/anomaly_detection_algo/ets.py index a7fc76b..986e82d 100644 --- a/logai/algorithms/anomaly_detection_algo/ets.py +++ b/logai/algorithms/anomaly_detection_algo/ets.py @@ -6,7 +6,7 @@ # # import pandas as pd -from attr import dataclass +from dataclasses import dataclass, field from datetime import datetime from logai.algorithms.algo_interfaces import AnomalyDetectionAlgo @@ -49,7 +49,7 @@ class ETSDetectorParams(Config): seasonal: str = "add" seasonal_periods: str = None refit: bool = True - kwargs: dict = {} + kwargs: dict = field(default_factory=dict) @factory.register("detection", "ets", ETSDetectorParams) diff --git a/logai/algorithms/anomaly_detection_algo/isolation_forest.py b/logai/algorithms/anomaly_detection_algo/isolation_forest.py index 4ace261..e6de3e5 100755 --- a/logai/algorithms/anomaly_detection_algo/isolation_forest.py +++ b/logai/algorithms/anomaly_detection_algo/isolation_forest.py @@ -6,7 +6,7 @@ # # import pandas as pd -from attr import dataclass +from dataclasses import dataclass from sklearn.ensemble import IsolationForest diff --git a/logai/algorithms/anomaly_detection_algo/local_outlier_factor.py b/logai/algorithms/anomaly_detection_algo/local_outlier_factor.py index e85fd62..51f2451 100755 --- a/logai/algorithms/anomaly_detection_algo/local_outlier_factor.py +++ b/logai/algorithms/anomaly_detection_algo/local_outlier_factor.py @@ -6,7 +6,7 @@ # # import pandas as pd -from attr import dataclass +from dataclasses import dataclass from sklearn.neighbors import LocalOutlierFactor import numpy as np diff --git a/logai/algorithms/anomaly_detection_algo/one_class_svm.py b/logai/algorithms/anomaly_detection_algo/one_class_svm.py index ff06272..e94a086 100755 --- a/logai/algorithms/anomaly_detection_algo/one_class_svm.py +++ b/logai/algorithms/anomaly_detection_algo/one_class_svm.py @@ -6,7 +6,7 @@ # # import pandas as pd -from attr import dataclass +from dataclasses import dataclass from sklearn.svm import OneClassSVM from logai.algorithms.algo_interfaces import AnomalyDetectionAlgo diff --git a/logai/algorithms/categorical_encoding_algo/ordinal_encoding.py b/logai/algorithms/categorical_encoding_algo/ordinal_encoding.py index 1b6de81..1eaa479 100755 --- a/logai/algorithms/categorical_encoding_algo/ordinal_encoding.py +++ b/logai/algorithms/categorical_encoding_algo/ordinal_encoding.py @@ -7,7 +7,7 @@ # import numpy as np import pandas as pd -from attr import dataclass +from dataclasses import dataclass from sklearn.preprocessing import OrdinalEncoder from logai.algorithms.algo_interfaces import CategoricalEncodingAlgo diff --git a/logai/algorithms/clustering_algo/birch.py b/logai/algorithms/clustering_algo/birch.py index 88ca30d..7fc5169 100755 --- a/logai/algorithms/clustering_algo/birch.py +++ b/logai/algorithms/clustering_algo/birch.py @@ -7,7 +7,7 @@ # import numpy as np import pandas as pd -from attr import dataclass +from dataclasses import dataclass from sklearn.cluster import Birch from logai.algorithms.algo_interfaces import ClusteringAlgo diff --git a/logai/algorithms/clustering_algo/dbscan.py b/logai/algorithms/clustering_algo/dbscan.py index 8b56a99..ac977af 100755 --- a/logai/algorithms/clustering_algo/dbscan.py +++ b/logai/algorithms/clustering_algo/dbscan.py @@ -6,7 +6,7 @@ # # import pandas as pd -from attr import dataclass +from dataclasses import dataclass from sklearn.cluster import DBSCAN from logai.algorithms.algo_interfaces import ClusteringAlgo diff --git a/logai/algorithms/clustering_algo/kmeans.py b/logai/algorithms/clustering_algo/kmeans.py index e7c9704..6381b2f 100755 --- a/logai/algorithms/clustering_algo/kmeans.py +++ b/logai/algorithms/clustering_algo/kmeans.py @@ -6,7 +6,7 @@ # # import pandas as pd -from attr import dataclass +from dataclasses import dataclass from sklearn.cluster import KMeans from logai.algorithms.algo_interfaces import ClusteringAlgo diff --git a/logai/algorithms/nn_model/forecast_nn/base_nn.py b/logai/algorithms/nn_model/forecast_nn/base_nn.py index 5452341..e6d76f2 100755 --- a/logai/algorithms/nn_model/forecast_nn/base_nn.py +++ b/logai/algorithms/nn_model/forecast_nn/base_nn.py @@ -17,7 +17,7 @@ from .utils import set_device, tensor2flatten_arr from logai.config_interfaces import Config from logai.utils.file_utils import read_file -from attr import dataclass +from dataclasses import dataclass from logai.algorithms.vectorization_algo.forecast_nn import ForecastNNVectorizedDataset from torch.utils.data import DataLoader diff --git a/logai/algorithms/nn_model/forecast_nn/cnn.py b/logai/algorithms/nn_model/forecast_nn/cnn.py index 87c666c..cc46d8b 100755 --- a/logai/algorithms/nn_model/forecast_nn/cnn.py +++ b/logai/algorithms/nn_model/forecast_nn/cnn.py @@ -14,7 +14,7 @@ ForecastBasedNN, ForecastBasedNNParams, ) -from attr import dataclass +from dataclasses import dataclass, field @dataclass @@ -24,7 +24,7 @@ class CNNParams(ForecastBasedNNParams): :param kernel_sizes: the kernel size (default value: list = [2, 3, 4]). """ - kernel_sizes: list = [2, 3, 4] + kernel_sizes: list = field(default_factory=[2, 3, 4]) #[2, 3, 4] class CNN(ForecastBasedNN): diff --git a/logai/algorithms/nn_model/forecast_nn/lstm.py b/logai/algorithms/nn_model/forecast_nn/lstm.py index c882eeb..1398a67 100755 --- a/logai/algorithms/nn_model/forecast_nn/lstm.py +++ b/logai/algorithms/nn_model/forecast_nn/lstm.py @@ -13,7 +13,7 @@ ForecastBasedNN, ) from logai.algorithms.vectorization_algo.forecast_nn import ForecastNNVectorizedDataset -from attr import dataclass +from dataclasses import dataclass @dataclass diff --git a/logai/algorithms/nn_model/forecast_nn/transformer.py b/logai/algorithms/nn_model/forecast_nn/transformer.py index 13285f0..602c34d 100755 --- a/logai/algorithms/nn_model/forecast_nn/transformer.py +++ b/logai/algorithms/nn_model/forecast_nn/transformer.py @@ -10,7 +10,7 @@ ForecastBasedNN, ForecastBasedNNParams, ) -from attr import dataclass +from dataclasses import dataclass from logai.algorithms.vectorization_algo.forecast_nn import ForecastNNVectorizedDataset diff --git a/logai/algorithms/nn_model/logbert/configs.py b/logai/algorithms/nn_model/logbert/configs.py index b5f5736..daee1bd 100755 --- a/logai/algorithms/nn_model/logbert/configs.py +++ b/logai/algorithms/nn_model/logbert/configs.py @@ -5,7 +5,7 @@ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause # # -from attr import dataclass +from dataclasses import dataclass from logai.config_interfaces import Config diff --git a/logai/algorithms/nn_model/transformers.py b/logai/algorithms/nn_model/transformers.py index eba2da9..2d034b4 100755 --- a/logai/algorithms/nn_model/transformers.py +++ b/logai/algorithms/nn_model/transformers.py @@ -9,7 +9,7 @@ import pandas as pd import torch from typing import Tuple, Dict -from attr import dataclass +from dataclasses import dataclass from datasets import Dataset, load_metric from sklearn.model_selection import train_test_split from torch.optim import AdamW diff --git a/logai/algorithms/parsing_algo/ael.py b/logai/algorithms/parsing_algo/ael.py index abded7a..0a9ad10 100755 --- a/logai/algorithms/parsing_algo/ael.py +++ b/logai/algorithms/parsing_algo/ael.py @@ -15,7 +15,7 @@ import pandas as pd from collections import defaultdict from functools import reduce -from attr import dataclass +from dataclasses import dataclass from logai.algorithms.algo_interfaces import ParsingAlgo from logai.config_interfaces import Config diff --git a/logai/algorithms/parsing_algo/drain-3.py b/logai/algorithms/parsing_algo/drain-3.py new file mode 100644 index 0000000..39b8454 --- /dev/null +++ b/logai/algorithms/parsing_algo/drain-3.py @@ -0,0 +1,91 @@ +import os +import csv +import re +import sys +from drain3 import TemplateMiner +from drain3.template_miner_config import TemplateMinerConfig + +from typing import List, Dict + +import pandas as pd +from dataclasses import dataclass + +from abc import ABC, abstractmethod + +from logai.algorithms.algo_interfaces import ParsingAlgo +from logai.config_interfaces import Config +from logai.algorithms.factory import factory + + +@dataclass +class Drain3Params(Config): + """Parameters for Drain Log Parser. + For more details on parameters see + https://github.com/logpai/Drain3/blob/master/drain3/drain.py. + + :param depth: The depth of tree. + :param sim_th: The similarity threshold. + :param max_children: The max number of children nodes. + :param max_clusters: The max number of clusters. + :param extra_delimiters: Extra delimiters. + :param param_str: The wildcard parameter string. + """ + depth: int = 3 + sim_th: float = 0.4 + max_children: int = 100 + max_clusters: int = None + extra_delimiters: tuple = () + param_str: str = "*" + + @classmethod + def from_dict(cls, config_dict): + config = super(Drain3Params, cls).from_dict(config_dict) + if config.extra_delimiters: + config.extra_delimiters = tuple(config.extra_delimiters) + return config + +@factory.register("parsing", "drain", Drain3Params) +class Drain_3(ParsingAlgo): + def __init__(self): + super().__init__() + + def parse(self, logline: pd.Series) -> pd.Series: + """Parse method to run log parser on a given log data. + + :param logline: The raw log data to be parsed. + :returns: The parsed log data. + """ + self.fit(logline) + parsed_logline = [] + for line in logline: + parsed_logline.append(" ".join(self.match(line).log_template_tokens)) + return pd.Series(parsed_logline, index=logline.index) + + def parse(self, logline: pd.Series) -> pd.Series: + drain3_config = TemplateMinerConfig() + drain3_config.load("./drain3.ini") + drain3_config.profiling_enabled = False + processed = [] + template_miner = TemplateMinerConfig(config = drain3_config) + + """ + with open (input_log_file, output_csv_file) as f: + for line in f: + line = line.strip() + if not line: + continue + + result = template_miner.add_log_message(line) + if result: + processed.append({ + "cluster_id": result["cluster_id"], + "template" : result["template_mined"], + "log" : line + }) + with open(output_csv_file,'w',newline='') as cf: + writer = csv.DictWriter(cf,fieldnames=["cluster_id","template","log"]) + writer.writeheader() + for row in processed: + writer.writerow(row) + """ + diff --git a/logai/algorithms/parsing_algo/drain3.ini b/logai/algorithms/parsing_algo/drain3.ini new file mode 100644 index 0000000..2915960 --- /dev/null +++ b/logai/algorithms/parsing_algo/drain3.ini @@ -0,0 +1,30 @@ +[SNAPSHOT] +snapshot_interval_minutes = 10 +compress_state = True + +[MASKING] +masking = [ + {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(([0-9a-f]{2,}:){3,}([0-9a-f]{2,}))((?=[^A-Za-z0-9])|$)", "mask_with": "ID"}, + {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3})((?=[^A-Za-z0-9])|$)", "mask_with": "IP"}, + {"regex_pattern":"((?<=[^A-Za-z0-9])|^)([0-9a-f]{6,} ?){3,}((?=[^A-Za-z0-9])|$)", "mask_with": "SEQ"}, + {"regex_pattern":"((?<=[^A-Za-z0-9])|^)([0-9A-F]{4} ?){4,}((?=[^A-Za-z0-9])|$)", "mask_with": "SEQ"}, + {"regex_pattern":"((?<=[^A-Za-z0-9])|^)(0x[a-f0-9A-F]+)((?=[^A-Za-z0-9])|$)", "mask_with": "HEX"}, + {"regex_pattern":"((?<=[^A-Za-z0-9])|^)([\\-\\+]?\\d+)((?=[^A-Za-z0-9])|$)", "mask_with": "NUM"}, + {"regex_pattern":"(?<=executed cmd )(\".+?\")", "mask_with": "CMD"} + ] +mask_prefix = <: +mask_suffix = :> + +[DRAIN] +# engine is Optional parameter. Engine will be "Drain" if the engine argument is not specified. +# engine has two options: 'Drain' and 'JaccardDrain'. +# engine = Drain +sim_th = 0.4 +depth = 4 +max_children = 100 +max_clusters = 1024 +extra_delimiters = ["_"] + +[PROFILING] +enabled = True +report_sec = 30 \ No newline at end of file diff --git a/logai/algorithms/parsing_algo/iplom.py b/logai/algorithms/parsing_algo/iplom.py index 5082e52..7ea68dc 100755 --- a/logai/algorithms/parsing_algo/iplom.py +++ b/logai/algorithms/parsing_algo/iplom.py @@ -5,7 +5,7 @@ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause # # -from attr import dataclass +from dataclasses import dataclass from logai.algorithms.algo_interfaces import ParsingAlgo from logai.config_interfaces import Config diff --git a/logai/algorithms/vectorization_algo/fasttext.py b/logai/algorithms/vectorization_algo/fasttext.py index a80b773..6cebe65 100755 --- a/logai/algorithms/vectorization_algo/fasttext.py +++ b/logai/algorithms/vectorization_algo/fasttext.py @@ -8,7 +8,7 @@ import gensim import numpy as np import pandas as pd -from attr import dataclass +from dataclasses import dataclass from nltk.tokenize import word_tokenize from logai.algorithms.algo_interfaces import VectorizationAlgo diff --git a/logai/algorithms/vectorization_algo/forecast_nn.py b/logai/algorithms/vectorization_algo/forecast_nn.py index 4906c5a..96d5b10 100644 --- a/logai/algorithms/vectorization_algo/forecast_nn.py +++ b/logai/algorithms/vectorization_algo/forecast_nn.py @@ -10,7 +10,7 @@ import numpy as np import pickle as pkl import pandas as pd -from attr import dataclass +from dataclasses import dataclass from logai.algorithms.algo_interfaces import VectorizationAlgo from .semantic import Semantic, SemanticVectorizerParams diff --git a/logai/algorithms/vectorization_algo/logbert.py b/logai/algorithms/vectorization_algo/logbert.py index 555d531..46d92bb 100755 --- a/logai/algorithms/vectorization_algo/logbert.py +++ b/logai/algorithms/vectorization_algo/logbert.py @@ -7,7 +7,7 @@ # import os import pandas as pd -from attr import dataclass +from dataclasses import dataclass from tokenizers import ( decoders, models, diff --git a/logai/algorithms/vectorization_algo/semantic.py b/logai/algorithms/vectorization_algo/semantic.py index fdf4539..d53d8ba 100644 --- a/logai/algorithms/vectorization_algo/semantic.py +++ b/logai/algorithms/vectorization_algo/semantic.py @@ -13,7 +13,7 @@ import os import gensim.downloader import logging -from attr import dataclass +from dataclasses import dataclass from nltk.tokenize import word_tokenize from logai.algorithms.algo_interfaces import VectorizationAlgo diff --git a/logai/algorithms/vectorization_algo/sequential.py b/logai/algorithms/vectorization_algo/sequential.py index 3fe69bf..6027e31 100755 --- a/logai/algorithms/vectorization_algo/sequential.py +++ b/logai/algorithms/vectorization_algo/sequential.py @@ -6,7 +6,7 @@ # # import pandas as pd -from attr import dataclass +from dataclasses import dataclass import pickle as pkl import os diff --git a/logai/algorithms/vectorization_algo/tfidf.py b/logai/algorithms/vectorization_algo/tfidf.py index e76252b..1a01202 100755 --- a/logai/algorithms/vectorization_algo/tfidf.py +++ b/logai/algorithms/vectorization_algo/tfidf.py @@ -7,7 +7,7 @@ # import numpy as np import pandas as pd -from attr import dataclass +from dataclasses import dataclass from sklearn.feature_extraction.text import TfidfVectorizer from logai.algorithms.algo_interfaces import VectorizationAlgo diff --git a/logai/algorithms/vectorization_algo/word2vec.py b/logai/algorithms/vectorization_algo/word2vec.py index 94c1699..446f2b7 100755 --- a/logai/algorithms/vectorization_algo/word2vec.py +++ b/logai/algorithms/vectorization_algo/word2vec.py @@ -8,7 +8,7 @@ import gensim import numpy as np import pandas as pd -from attr import dataclass +from dataclasses import dataclass from nltk.tokenize import word_tokenize from logai.algorithms.algo_interfaces import VectorizationAlgo diff --git a/logai/analysis/anomaly_detector.py b/logai/analysis/anomaly_detector.py index 3d5ac77..fb26cd0 100755 --- a/logai/analysis/anomaly_detector.py +++ b/logai/analysis/anomaly_detector.py @@ -6,9 +6,9 @@ # # import pandas as pd -import logai.algorithms.anomaly_detection_algo +#import logai.algorithms.anomaly_detection_algo -from attr import dataclass +from dataclasses import dataclass from logai.config_interfaces import Config from logai.algorithms.factory import factory diff --git a/logai/analysis/clustering.py b/logai/analysis/clustering.py index 3f5300c..1e6d5d7 100755 --- a/logai/analysis/clustering.py +++ b/logai/analysis/clustering.py @@ -6,7 +6,7 @@ # # import pandas as pd -from attr import dataclass +from dataclasses import dataclass import logai.algorithms.clustering_algo from logai.config_interfaces import Config diff --git a/logai/applications/application_interfaces.py b/logai/applications/application_interfaces.py index 606d12f..749e3e9 100755 --- a/logai/applications/application_interfaces.py +++ b/logai/applications/application_interfaces.py @@ -5,7 +5,7 @@ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause # # -from attr import dataclass +from dataclasses import dataclass from logai.analysis.anomaly_detector import AnomalyDetectionConfig from logai.analysis.nn_anomaly_detector import NNAnomalyDetectionConfig diff --git a/logai/dataloader/data_loader.py b/logai/dataloader/data_loader.py index 87465a7..ea663bb 100755 --- a/logai/dataloader/data_loader.py +++ b/logai/dataloader/data_loader.py @@ -9,7 +9,7 @@ import re import pandas as pd -from attr import dataclass +from dataclasses import dataclass, field from logai.config_interfaces import Config from logai.dataloader.data_model import LogRecordObject @@ -23,11 +23,11 @@ class DataLoaderConfig(Config): """ filepath: str = "" - log_type: str = "csv" - dimensions: dict = dict() - reader_args: dict = dict() + log_type: str = "txt" + dimensions: dict = field(default_factory=dict) + reader_args: dict = field(default_factory=dict) infer_datetime: bool = False - datetime_format: str = "%Y-%M-%dT%H:%M:%SZ" # Default the ISO 8601 format example 2022-05-26T21:29:09+00:00 + datetime_format: str = "%Y-%m-%dT%H:%M:%S" # Default the ISO 8601 format example 2022-05-26T21:29:09+00:00 open_dataset: str = None @@ -149,11 +149,13 @@ def _create_log_record_object(self, df: pd.DataFrame): ) selected.columns = [constants.LOG_TIMESTAMPS] if self.config.infer_datetime and self.config.datetime_format: - datetime_format = self.config.datetime_format + datetime_format = self.config.datetime_format selected[constants.LOG_TIMESTAMPS] = pd.to_datetime( selected[constants.LOG_TIMESTAMPS], format=datetime_format, + errors="coerce" ) + selected = selected[selected[constants.LOG_TIMESTAMPS].notna()] setattr(log_record, field, selected) # log_record.__post_init__() diff --git a/logai/dataloader/data_model.py b/logai/dataloader/data_model.py index 4be68d2..8fae178 100755 --- a/logai/dataloader/data_model.py +++ b/logai/dataloader/data_model.py @@ -34,15 +34,15 @@ class LogRecordObject: :param _index: The indices of the log data. """ - timestamp: pd.DataFrame = pd.DataFrame() - attributes: pd.DataFrame = pd.DataFrame() - resource: pd.DataFrame = pd.DataFrame() - trace_id: pd.DataFrame = pd.DataFrame() - span_id: pd.DataFrame = pd.DataFrame() - severity_text: pd.DataFrame = pd.DataFrame() - severity_number: pd.DataFrame = pd.DataFrame() - body: pd.DataFrame = pd.DataFrame() - labels: pd.DataFrame = pd.DataFrame() + timestamp: pd.DataFrame = field(default_factory=lambda: pd.DataFrame()) + attributes: pd.DataFrame = field(default_factory=lambda: pd.DataFrame()) + resource: pd.DataFrame = field(default_factory=lambda: pd.DataFrame()) + trace_id: pd.DataFrame = field(default_factory=lambda: pd.DataFrame()) + span_id: pd.DataFrame = field(default_factory=lambda: pd.DataFrame()) + severity_text: pd.DataFrame = field(default_factory=lambda: pd.DataFrame()) + severity_number: pd.DataFrame = field(default_factory=lambda: pd.DataFrame()) + body: pd.DataFrame = field(default_factory=lambda: pd.DataFrame()) + labels: pd.DataFrame = field(default_factory=lambda: pd.DataFrame()) _index: np.array = field(init=False) def __post_init__(self): diff --git a/logai/dataloader/openset_data_loader.py b/logai/dataloader/openset_data_loader.py index 0e42a25..8e4d70e 100755 --- a/logai/dataloader/openset_data_loader.py +++ b/logai/dataloader/openset_data_loader.py @@ -8,7 +8,7 @@ import json import os.path -from attr import dataclass +from dataclasses import dataclass from logai.config_interfaces import Config from logai.dataloader.data_loader import FileDataLoader, DataLoaderConfig diff --git a/logai/information_extraction/categorical_encoder.py b/logai/information_extraction/categorical_encoder.py index 00cddb6..6316ea5 100755 --- a/logai/information_extraction/categorical_encoder.py +++ b/logai/information_extraction/categorical_encoder.py @@ -7,7 +7,7 @@ # import pandas as pd -from attr import dataclass +from dataclasses import dataclass from logai.algorithms.categorical_encoding_algo.label_encoding import LabelEncoding from logai.algorithms.categorical_encoding_algo.one_hot_encoding import ( diff --git a/logai/information_extraction/log_vectorizer.py b/logai/information_extraction/log_vectorizer.py index 6823604..5d3260d 100755 --- a/logai/information_extraction/log_vectorizer.py +++ b/logai/information_extraction/log_vectorizer.py @@ -6,7 +6,7 @@ # # import pandas as pd -from attr import dataclass +from dataclasses import dataclass import logai.algorithms.vectorization_algo from logai.config_interfaces import Config diff --git a/logai/preprocess/openset_partitioner.py b/logai/preprocess/openset_partitioner.py index a28b2e7..2d33db7 100644 --- a/logai/preprocess/openset_partitioner.py +++ b/logai/preprocess/openset_partitioner.py @@ -14,7 +14,7 @@ from logai.utils import constants from logai.config_interfaces import Config -from attr import dataclass +from dataclasses import dataclass @dataclass diff --git a/logai/preprocess/partitioner.py b/logai/preprocess/partitioner.py index 830a0b8..2250197 100755 --- a/logai/preprocess/partitioner.py +++ b/logai/preprocess/partitioner.py @@ -7,7 +7,7 @@ # import pandas as pd -from attr import dataclass +from dataclasses import dataclass from logai.config_interfaces import Config from logai.utils import constants diff --git a/logai/preprocess/preprocessor.py b/logai/preprocess/preprocessor.py index a8cda3f..0c41dee 100755 --- a/logai/preprocess/preprocessor.py +++ b/logai/preprocess/preprocessor.py @@ -7,7 +7,7 @@ # import numpy as np import pandas as pd -from attr import dataclass +from dataclasses import dataclass from logai.config_interfaces import Config from logai.dataloader.data_model import LogRecordObject diff --git a/logai/preprocess/telemetry_parser.py b/logai/preprocess/telemetry_parser.py new file mode 100644 index 0000000..344a365 --- /dev/null +++ b/logai/preprocess/telemetry_parser.py @@ -0,0 +1,215 @@ +import re +import os +import json +import glob +from logai.utils.constants import TELEMETRY_PROFILES, MERGED_LOGS_DIRECTORY +from logai.utils import json_helper +import pandas as pd +from enum import Enum + +class DML(str, Enum): + TIME = ".Time" + + MEM_AVAILABLE = ".meminfoavailable_split" + MEM_CACHED = ".cachedMem_split" + MEM_FREE = ".flash_usage_nvram_free_split" + + # CPU + CPU_TEMP = ".cpu_temp_split" + CPU_USAGE = ".CPUUsage" + + # Device Info + MAC_ADDRESS = ".mac" + VER = ".Version" + PROD_CLS = ".ProductClass" + + SERIAL_NUMBER = ".SerialNumber" + SW_VERSION = ".Version" + HW_VERSION = ".hardwareversion" + MODEL_NAME = ".ModelName" + MANUFACTURER = ".manufacturer" + EROUTER = ".erouterIpv4" + + # device status + WAN_MODE = ".wan_access_mode_split" + RADIO1_EN = ".wifi_radio_1_enable" + RADIO2_EN = ".wifi_radio_2_enable" + AP1_EN = ".wifi_accesspoint_1_status" + AP2_EN = ".wifi_accesspoint_2_status" + SSID1 = ".wifi_ssid_1_ssid" + SSID2 = ".wifi_ssid_2_ssid" + AIETIES_EDGE = ".airties_edge_enable" + + # WAN Sts + WAN_BYTES_RCVD = ".wan_bytesReceived" + WAN_BYTES_SENT = ".wan_bytesSent" + WAN_PKT_RCVD = ".wan_packetsReceived" + WAN_PKT_SENT = ".wan_packetsSent" + + # SSID Stats + SSID1_PKT_SENT = ".wifi_ssid_1_stats_packetssent" + SSID1_PKT_RCVD = ".wifi_ssid_1_stats_packetsreceived" + SSID1_BYTE_SENT = ".wifi_ssid_1_stats_bytessent" + SSID1_BYTE_RCVD = ".wifi_ssid_1_stats_bytesreceived" + SSID1_ERROR_SENT = ".wifi_ssid_1_stats_errorssent" + SSID1_ERROR_RCVD = ".wifi_ssid_1_stats_errorsreceived" + +class Telemetry2Parser: + """ + Implementation of file data loader, reading log record objects from local files. + """ + + def __init__(self): + self.log_prefix_pattern = re.compile(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2} [^ ]+ T\d\.\w+ \[tid=\d+\] ?", re.MULTILINE) + self.filename = "telemetry2_0" + self.file_path = None + self.telemetry_report = pd.DataFrame() + self.telemetry_path = TELEMETRY_PROFILES + + if not os.path.exists(self.telemetry_path): + os.makedirs(self.telemetry_path, exist_ok=True) + + def _check_telemetry_file(self): + for fname in os.listdir(MERGED_LOGS_DIRECTORY): + if self.filename in fname: + self.file_path = os.path.join(MERGED_LOGS_DIRECTORY, fname) + #print("fle path ", self.file_path) + + if self.file_path is not None and os.path.exists(self.file_path): + return True + else: + return False + + def extract_telemetry_reports(self): + + if not self._check_telemetry_file(): + #print("return") + return + + inside_json = False + open_braces = 0 + json_buffer = "" + json_blocks = [] + + with open(self.file_path, "r") as infile: + for line in infile: + # Remove log prefixes from line (works anywhere in the line) + clean_line = self.log_prefix_pattern.sub("", line) + + # For every collected line, strip newlines and spaces immediately: + stripped_line = clean_line.strip().replace('\n', '').replace('\r', '') + if not inside_json: + brace_pos = stripped_line.find("{") + if brace_pos != -1: + inside_json = True + json_buffer = stripped_line[brace_pos:] + open_braces = json_buffer.count("{") - json_buffer.count("}") + if open_braces == 0: + json_blocks.append(json_buffer) + inside_json = False + json_buffer = "" + else: + json_buffer += stripped_line + open_braces += stripped_line.count("{") - stripped_line.count("}") + if open_braces == 0: + json_blocks.append(json_buffer) + inside_json = False + json_buffer = "" + + #print(f"Found {len(json_blocks)} JSON blocks.") + + for idx, json_str in enumerate(json_blocks, 1): + # Remove everything after the last closing '}]}' (or '}}'), plus whitespace and percent + # Prefer to anchor on '}]}' for your Report use case + m = re.search(r'(.*\}\]\})', json_str, re.DOTALL) + if m: + json_str = m.group(1) + else: + # Fallback: Remove trailing percent and whitespace + json_str = re.sub(r'[%\s]+$', '', json_str) + outname = f"Telemetry2_report_{idx}.json" + out_path = os.path.join(self.telemetry_path, outname) + with open(out_path, "w", encoding="utf-8") as fout: + fout.write(json_str) + + def get_timestamp(self): + data = self.telemetry_report + + if data.empty: + return False + + col_time = DML.TIME + matching_columns = [col for col in data.columns if col_time.lower() in col.lower()] + + timestamp = pd.to_datetime( + self.telemetry_report[matching_columns[0]], + format="%Y-%m-%d %H:%M:%S", + ) + #print("TimeStamp", timestamp) + return timestamp + + def get_column_name(self, value): + data = self.telemetry_report + + if self.telemetry_report.empty: + print("Telemetry Report Empty!") + return None + else: + matching_columns = [col for col in data.columns if value.lower() in col.lower()] + return matching_columns[0] + + def get_telemetry_col(self,value): + data = self.telemetry_report + + if self.telemetry_report.empty: + print("Telemetry Report Empty!") + return None + else: + matching_columns = [col for col in data.columns if value.lower() in col.lower()] + if len(matching_columns): + return data[matching_columns[0]] + else: + print("Column not Found!", value) + return None + + def get_telemetry_value(self, value, index=0): + data = self.telemetry_report + + if self.telemetry_report.empty: + print("Telemetry Report Empty!") + return None + else: + matching_columns = [col for col in data.columns if value.lower() in col.lower()] + #print(matching_columns) + if len(matching_columns): + time_col = self.get_column_name(DML.TIME) + latest = data.sort_values(time_col).iloc[-1] + return latest[matching_columns[index]] + else: + print("Column not Found!", value) + return None + + def start_processing(self): + telemetry_report = pd.DataFrame() + + DATA_LIST = [] + # ---------- Load & prep once at start (or via Upload component) ---------- + for fname in glob.glob(TELEMETRY_PROFILES + "/*.json"): + RAW = json_helper.load_json(fname) + if RAW is not None: + data = json_helper.json_to_df(RAW) + DATA_LIST.append(data) + + telemetry_report = pd.concat(DATA_LIST) + excel_path = os.path.join(TELEMETRY_PROFILES, "Telemetry2_report.xlsx") + try: + with pd.ExcelWriter(excel_path) as writer: + telemetry_report.to_excel(writer) + except Exception as e: + print("Excepton occured ", e) + + #print(telemetry_report.columns) + self.telemetry_report = telemetry_report + + def telemetry_report(self): + return self.telemetry_report \ No newline at end of file diff --git a/logai/utils/constants.py b/logai/utils/constants.py index a4ecf40..225db0d 100644 --- a/logai/utils/constants.py +++ b/logai/utils/constants.py @@ -7,6 +7,10 @@ # from enum import Enum +UPLOAD_DIRECTORY = "./app_uploaded_files" +MERGED_LOGS_DIRECTORY = "./app_uploaded_files/merged_logs" +TELEMETRY_PROFILES = "./app_uploaded_files/telemetry" + DIGITS_SUB = "[DIGITS]" TIMESTAMP = "[TIMESTAMP]" diff --git a/logai/utils/json_helper.py b/logai/utils/json_helper.py new file mode 100644 index 0000000..9440e5c --- /dev/null +++ b/logai/utils/json_helper.py @@ -0,0 +1,51 @@ +import json, pathlib, pandas as pd +from typing import Union, Dict, Any +import pandas as pd +import re + +def clean_json_string(raw_str): + return re.sub(r'[\x00-\x1f\x7f]', '', raw_str) + +def load_json(path: Union[str, pathlib.Path]) -> Any: + """Read file, verify valid JSON, raise with context if broken.""" + try: + with open(path, "r", encoding="utf-8") as f: + raw = f.read() + cleaned = clean_json_string(raw) + data = json.loads(cleaned) + return data + except json.JSONDecodeError as e: + return None + #raise ValueError(f"Bad JSON in {path} → {e}") from None + +def _flatten(obj: Any, parent_key: str = "", sep: str = ".") -> Dict[str, Any]: + """Recursively flattens nested dicts/lists to a single-level dict.""" + items = {} + if isinstance(obj, dict): + for k, v in obj.items(): + items.update(_flatten(v, f"{parent_key}{k}{sep}" if parent_key else k, sep)) + elif isinstance(obj, list): + for _, v in enumerate(obj): + items.update(_flatten(v, f"{parent_key}{sep}" if parent_key else sep)) + else: + items[parent_key.rstrip(sep)] = obj + return items + +def json_to_df(raw: Any) -> pd.DataFrame: + """Normalise single-object or list-of-objects JSON to tidy DataFrame.""" + if isinstance(raw, list): + rows = [_flatten(r) for r in raw] + elif isinstance(raw, dict): + # treat top-level dict as one row unless it obviously contains rows + rows = ([_flatten(raw)] + if not all(isinstance(v, (list, dict)) for v in raw.values()) + else [_flatten(raw)]) + else: + raise TypeError("Unsupported JSON structure") + + df = pd.DataFrame(rows) + # OPTIONAL CLEAN-UP --------- + df.replace({"": None, "null": None}, inplace=True) + df.dropna(axis=1, how="all", inplace=True) # drop empty cols + df = df.convert_dtypes() # best-guess dtypes + return df diff --git a/logai/utils/singleton.py b/logai/utils/singleton.py new file mode 100644 index 0000000..e69de29 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3b13f64 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,108 @@ +aiohappyeyeballs==2.6.1 +aiohttp==3.12.14 +aiosignal==1.4.0 +alembic==1.16.4 +async-timeout==5.0.1 +attrs==25.3.0 +blinker==1.9.0 +cachetools==4.2.1 +category-encoders==2.6.4 +certifi==2025.7.14 +charset-normalizer==3.4.2 +click==8.1.8 +cloudpickle==3.1.1 +cmdstanpy==1.2.5 +colorlog==6.9.0 +contourpy==1.3.0 +cycler==0.12.1 +Cython==3.1.2 +dash==3.1.1 +dash-bootstrap-components==2.0.3 +dash-bootstrap-templates==2.1.0 +dash_ag_grid==32.3.0 +datasets==4.0.0 +dill==0.3.8 +dotenv==0.9.9 +drain3==0.9.11 +et_xmlfile==2.0.0 +filelock==3.18.0 +Flask==3.1.1 +fonttools==4.59.0 +frozenlist==1.7.0 +fsspec==2025.3.0 +gensim==4.3.3 +gitdb==4.0.12 +GitPython==3.1.45 +hf-xet==1.1.5 +holidays==0.77 +huggingface-hub==0.33.5 +idna==3.10 +importlib_metadata==8.7.0 +importlib_resources==6.5.2 +itsdangerous==2.2.0 +Jinja2==3.1.6 +joblib==1.5.1 +jsonpickle==1.5.1 +kiwisolver==1.4.7 +lightgbm==4.6.0 +llvmlite==0.43.0 +Mako==1.3.10 +MarkupSafe==3.0.2 +matplotlib==3.9.4 +mpmath==1.3.0 +multidict==6.6.3 +multiprocess==0.70.16 +narwhals==1.48.1 +nest-asyncio==1.6.0 +networkx==3.2.1 +nltk==3.9.1 +numba==0.60.0 +numpy==1.26.4 +openpyxl==3.1.5 +optuna==4.4.0 +packaging==25.0 +pandas==2.3.1 +patsy==1.0.1 +pillow==11.3.0 +plotly==6.2.0 +propcache==0.3.2 +prophet==1.1.7 +py4j==0.10.9.9 +pyarrow==21.0.0 +pyparsing==3.2.3 +python-dateutil==2.9.0.post0 +python-dotenv==1.1.1 +pytz==2025.2 +PyYAML==6.0.2 +regex==2024.11.6 +requests==2.32.4 +retrying==1.4.1 +safetensors==0.5.3 +salesforce-merlion==2.0.4 +scikit-learn==1.6.1 +scipy==1.13.1 +seaborn==0.13.2 +shap==0.48.0 +six==1.17.0 +sklearn==0.0 +slicer==0.0.8 +smart_open==7.3.0.post1 +smmap==5.0.2 +SQLAlchemy==2.0.41 +stanio==0.5.1 +statsmodels==0.14.5 +sympy==1.14.0 +threadpoolctl==3.6.0 +tokenizers==0.21.2 +tomli==2.2.1 +torch==2.7.1 +tqdm==4.67.1 +transformers==4.53.3 +typing_extensions==4.14.1 +tzdata==2025.2 +urllib3==2.5.0 +Werkzeug==3.1.3 +wrapt==1.17.2 +xxhash==3.5.0 +yarl==1.20.1 +zipp==3.23.0 diff --git a/setup.py b/setup.py index 78156f5..701e2eb 100644 --- a/setup.py +++ b/setup.py @@ -1,13 +1,4 @@ # -*- coding: utf-8 -*- -# -# Copyright (c) 2023 Salesforce.com, inc. -# All rights reserved. -# SPDX-License-Identifier: BSD-3-Clause -# For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause -# -# - - # Learn more: https://github.com/kennethreitz/setup.py from setuptools import setup, find_packages @@ -21,51 +12,45 @@ extras_require = { "gui": [ - "dash-bootstrap-components>=1.2.1", - "plotly>=5.9.0", - "dash>=2.5.1", + "dash-bootstrap-components>=2.0.3", + "plotly>=6.2.0", + "dash>=3.1.1", ], "deep-learning": [ - "tokenizers>=0.11.6", - "datasets>=1.18.3", - "torch>=1.10.1", - "transformers>=4.17.0,<=4.23", + "tokenizers>=0.21.2", + "datasets>=4.0.0", + "torch>=2.7.1", + "transformers>=4.53.3", ], "dev": [ - "Sphinx>=3.5.3", - "docutils>=0.18.1", - "wheel>=0.37.1", - "toml>=0.10.2", - "build>=0.7.0", - "jupyter>=1.0.0", - "ipykernel>=6.16", - "pytest>=6.2.5", + ] } extras_require["all"] = sum(extras_require.values(), []) setup( - name="logai", - version="0.1.5", - description="LogAI is unified framework for AI-based log analytics", + name="rdk_one3b", + version="0.0.1", + description="AI-Powered Fault Detection, Prediction and Auto-Healing for Home connectivity", long_description_content_type="text/markdown", long_description=readme, - author="Qian Cheng, Amrita Saha, Wenzhuo Yang, Chenghao Liu, Gerald Woo, Doyen Sahoo, Steven Hoi", - author_email="logai@salesforce.com", - python_requires=">=3.7.0,<4", + author="Arumugam Panchatcharam, Sivasubramanian, Vasanthakumar, Vandana, Divya Kamatagi, Siddharth Nair, Aditya", + author_email="telekom-digital.com", + python_requires=">=3.9.6", install_requires=[ + "scikit-learn>=1.6.1", + "pandas>=2.3.1", + "numpy>=2.0.2", + "PyYAML>=6.0.2", + "attrs>=25.3.0", + "schema>=0.7.5", "salesforce-merlion>=1.0.0", "Cython>=0.29.30", "nltk>=3.6.5", "gensim>=4.1.2", - "scikit-learn>=1.0.1", - "pandas>=1.2.0", - "numpy>=1.21.4", "spacy>=3.2.2", - "attrs>=21.2.0", "dataclasses>=0.6", - "PyYAML>=6.0", "tqdm>=4.62.3", "cachetools>=4.2.4", "matplotlib>=3.5.1", @@ -75,6 +60,6 @@ ], extras_require=extras_require, license=license, - packages=find_packages(exclude=["tests", "tests.*", "docs", "gui", "gui.*"]), + packages=find_packages(exclude=["tests", "tests.*", "docs", "gui", "gui.*"]), include_package_data=True, )