@article{huang2023look,title={Look before you leap: An exploratory study of uncertainty measurement for large language models},author={Huang, Yuheng and Song, Jiayang and Wang, Zhijie and Zhao, Shengming and Chen, Huaming and Juefei-Xu, Felix and Ma, Lei},journal={IEEE Transactions on Software Engineering.},year={2024},organization={IEEE},}
LLM-Robotics
ISR-llm: Iterative self-refined large language model for long-horizon sequential task planning
Zhehua Zhou, Jiayang Song, Kunpeng Yao, and 2 more authors
In 2024 IEEE International Conference on Robotics and Automation (ICRA), 2024
@inproceedings{zhou2024isr,title={ISR-llm: Iterative self-refined large language model for long-horizon sequential task planning},author={Zhou, Zhehua and Song, Jiayang and Yao, Kunpeng and Shu, Zhan and Ma, Lei},booktitle={2024 IEEE International Conference on Robotics and Automation (ICRA)},pages={2081--2088},year={2024},organization={IEEE},}
AI-CPS
Towards building AI-CPS with NVIDIA Isaac sim: An industrial benchmark and case study for robotics manipulation
Zhehua Zhou, Jiayang Song, Xuan Xie, and 5 more authors
In Proceedings of the 46th International Conference on Software Engineering: Software Engineering in Practice, 2024
@inproceedings{zhou2024towards,title={Towards building AI-CPS with NVIDIA Isaac sim: An industrial benchmark and case study for robotics manipulation},author={Zhou, Zhehua and Song, Jiayang and Xie, Xuan and Shu, Zhan and Ma, Lei and Liu, Dikai and Yin, Jianxiong and See, Simon},booktitle={Proceedings of the 46th International Conference on Software Engineering: Software Engineering in Practice},pages={263--274},year={2024},}
LLM
Online Safety Analysis for LLMs: a Benchmark, an Assessment, and a Path Forward
Xuan Xie, Jiayang Song, Zhehua Zhou, and 3 more authors
@article{xie2024online,title={Online Safety Analysis for LLMs: a Benchmark, an Assessment, and a Path Forward},author={Xie, Xuan and Song, Jiayang and Zhou, Zhehua and Huang, Yuheng and Song, Da and Ma, Lei},journal={Under Review},year={2024},}
VLA
Towards testing and evaluating vision-language-action models for robotic manipulation: An empirical study
Zhijie Wang, Zhehua Zhou, Jiayang Song, and 3 more authors
@article{wang2024towards,title={Towards testing and evaluating vision-language-action models for robotic manipulation: An empirical study},author={Wang, Zhijie and Zhou, Zhehua and Song, Jiayang and Huang, Yuheng and Shu, Zhan and Ma, Lei},journal={Under Review},year={2024},}
LLM
LUNA: A Model-Based Universal Analysis Framework for Large Language Models
Da Song, Xuan Xie, Jiayang Song, and 4 more authors
@article{song2024luna,title={LUNA: A Model-Based Universal Analysis Framework for Large Language Models},author={Song, Da and Xie, Xuan and Song, Jiayang and Zhu, Derui and Huang, Yuheng and Juefei-Xu, Felix and Ma, Lei},journal={IEEE Transactions on Software Engineering},year={2024},publisher={IEEE},}
AI-CPS
MORTAR: A Model-based Runtime Action Repair Framework for AI-enabled Cyber-Physical Systems
Renzhi Wang, Zhehua Zhou, Jiayang Song, and 3 more authors
@article{wang2024mortar,title={MORTAR: A Model-based Runtime Action Repair Framework for AI-enabled Cyber-Physical Systems},author={Wang, Renzhi and Zhou, Zhehua and Song, Jiayang and Xie, Xuan and Xie, Xiaofei and Ma, Lei},journal={Under Review},year={2024},}
LLM
Active Testing of Large Language Model via Multi-Stage Sampling
Yuheng Huang, Jiayang Song, Qiang Hu, and 2 more authors
@article{huang2024active,title={Active Testing of Large Language Model via Multi-Stage Sampling},author={Huang, Yuheng and Song, Jiayang and Hu, Qiang and Juefei-Xu, Felix and Ma, Lei},journal={Under Review},year={2024},}
LLM
Multilingual blending: Llm safety alignment evaluation with language mixture
Jiayang Song, Yuheng Huang, Zhehua Zhou, and 1 more author
@article{song2024multilingual,title={Multilingual blending: Llm safety alignment evaluation with language mixture},author={Song, Jiayang and Huang, Yuheng and Zhou, Zhehua and Ma, Lei},journal={Under Review},year={2024},}
LLM
Towards Understanding Retrieval Accuracy and Prompt Quality in RAG Systems
Shengming Zhao, Yuheng Huang, Jiayang Song, and 3 more authors
@article{zhao2024towards,title={Towards Understanding Retrieval Accuracy and Prompt Quality in RAG Systems},author={Zhao, Shengming and Huang, Yuheng and Song, Jiayang and Wang, Zhijie and Wan, Chengcheng and Ma, Lei},journal={Under Review},year={2024},}
Robotics-RL
GenSafe: A Generalizable Safety Enhancer for Safe Reinforcement Learning Algorithms Based on Reduced Order Markov Decision Process Model
Zhehua Zhou, Xuan Xie, Jiayang Song, and 2 more authors
IEEE Transactions on Neural Networks and Learning Systems, 2024
@article{10766903,author={Zhou, Zhehua and Xie, Xuan and Song, Jiayang and Shu, Zhan and Ma, Lei},journal={IEEE Transactions on Neural Networks and Learning Systems},title={GenSafe: A Generalizable Safety Enhancer for Safe Reinforcement Learning Algorithms Based on Reduced Order Markov Decision Process Model},year={2024},publisher={IEEE},pages={1-15},}
LLM
LADEV: A Language-Driven Testing and Evaluation Platform for Vision-Language-Action Models in Robotic Manipulation
Zhijie Wang, Zhehua Zhou, Jiayang Song, and 3 more authors
@article{wang2024ladev,title={LADEV: A Language-Driven Testing and Evaluation Platform for Vision-Language-Action Models in Robotic Manipulation},author={Wang, Zhijie and Zhou, Zhehua and Song, Jiayang and Huang, Yuheng and Shu, Zhan and Ma, Lei},journal={Under Review},year={2024},}
LLM
LeCov: Multi-level Testing Criteria for Large Language Models
Xuan Xie, Jiayang Song, Yuheng Huang, and 4 more authors
@article{xie2024lecov,title={LeCov: Multi-level Testing Criteria for Large Language Models},author={Xie, Xuan and Song, Jiayang and Huang, Yuheng and Song, Da and Zhang, Fuyuan and Juefei-Xu, Felix and Ma, Lei},journal={Under Review},year={2024},}
2023
LLM-Robotics
Self-refined large language model as automated reward function designer for deep reinforcement learning in robotics
Jiayang Song, Zhehua Zhou, Jiawei Liu, and 3 more authors
@article{song2023self,title={Self-refined large language model as automated reward function designer for deep reinforcement learning in robotics},author={Song, Jiayang and Zhou, Zhehua and Liu, Jiawei and Fang, Chunrong and Shu, Zhan and Ma, Lei},journal={Under Review},year={2023},}
AI-CPS
SIEGE: A Semantics-Guided Safety Enhancement Framework for AI-Enabled Cyber-Physical Systems
@article{song2023mathtt,title={SIEGE: A Semantics-Guided Safety Enhancement Framework for AI-Enabled Cyber-Physical Systems},author={Song, Jiayang and Xie, Xuan and Ma, Lei},journal={IEEE Transactions on Software Engineering},volume={49},number={8},pages={4058--4080},year={2023},publisher={IEEE},}
AI-CPS
Mosaic: Model-based Safety Analysis Framework for AI-enabled Cyber-Physical Systems
Xuan Xie, Jiayang Song, Zhehua Zhou, and 2 more authors
@article{xie2023mosaic,title={Mosaic: Model-based Safety Analysis Framework for AI-enabled Cyber-Physical Systems},author={Xie, Xuan and Song, Jiayang and Zhou, Zhehua and Zhang, Fuyuan and Ma, Lei},journal={Under Review},year={2023},}
AI-CPS
Autorepair: Automated repair for ai-enabled cyber-physical systems under safety-critical conditions
Deyun Lyu, Jiayang Song, Zhenya Zhang, and 4 more authors
@article{lyu2023autorepair,title={Autorepair: Automated repair for ai-enabled cyber-physical systems under safety-critical conditions},author={Lyu, Deyun and Song, Jiayang and Zhang, Zhenya and Wang, Zhijie and Zhang, Tianyi and Ma, Lei and Zhao, Jianjun},journal={Under Review},year={2023},}
2022
AI-CPS
When cyber-physical systems meet AI: A benchmark, an evaluation, and a way forward
Jiayang Song, Deyun Lyu, Zhenya Zhang, and 3 more authors
In Proceedings of the 44th International Conference on Software Engineering: Software Engineering in Practice, 2022
@inproceedings{song2022cyber,title={When cyber-physical systems meet AI: A benchmark, an evaluation, and a way forward},author={Song, Jiayang and Lyu, Deyun and Zhang, Zhenya and Wang, Zhijie and Zhang, Tianyi and Ma, Lei},booktitle={Proceedings of the 44th International Conference on Software Engineering: Software Engineering in Practice},pages={343--352},year={2022},}