2024 NeurIPS 2024 Mmlu-pro: A more robust and challenging multi-task language understanding benchmark Yubo Wang , Xueguang Ma , Ge Zhang , and 14 more authors 2024 arXiv Bib @article{wang2024mmlu, title = {Mmlu-pro: A more robust and challenging multi-task language understanding benchmark}, author = {Wang, Yubo and Ma, Xueguang and Zhang, Ge and Ni, Yuansheng and Chandra, Abhranil and Guo, Shiguang and Ren, Weiming and Arulraj, Aaran and He, Xuan and Jiang, Ziyan and Li, Tianle and Ku, Max and Wang, Kai and Zhuang, Alex and Fan, Rongqi and Yue, Xiang and Chen, Wenhu}, publisher = {arXiv preprint arXiv:2406.01574}, year = {2024}, google_scholar_id = {WF5omc3nYNoC}, } NeurIPS 2024 GenAI Arena: An Open Evaluation Platform for Generative Models Dongfu Jiang , Max Ku , Tianle Li , and 4 more authors 2024 arXiv Bib Website @article{li2024genai, title = {GenAI Arena: An Open Evaluation Platform for Generative Models}, author = {Jiang, Dongfu and Ku, Max and Li, Tianle and Ni, Yuansheng and Sun, Shizhuo and Fan, Rongqi and Chen, Wenhu}, publisher = {arXiv preprint arXiv:2406.01574}, year = {2024}, google_scholar_id = {IjCSPb-OGe4C}, } EMNLP 2024 VideoScore: Building Automatic Metrics to Simulate Fine-grained Human Feedback for Video Generation Xuan He , Dongfu Jiang , Ge Zhang Max Ku , and 15 more authors 2024 arXiv Bib @article{li2024genaj, title = {VideoScore: Building Automatic Metrics to Simulate Fine-grained Human Feedback for Video Generation}, author = {He, Xuan and Jiang, Dongfu and Ku, Ge Zhang Max and Soni, Achint and Siu, Sherman and Chen, Haonan and Chandra, Abhranil and Jiang, Ziyan and Arulraj, Aaran and Wang, Kai and Do, Quy Duc and Ni, Yuansheng and Lyu, Bohan and Narsupalli, Yaswanth and Fan, Rongqi and Lyu, Zhiheng and Lin, Yuchen and Chen, Wenhu}, publisher = {arXiv preprint arXiv:2406.01574}, year = {2024}, google_scholar_id = {MXK_kJrjxJIC}, } 2023 ICCV 2023 Workshop DeepfakeArt Challenge: A Benchmark Dataset for Generative AI Art Forgery and Data Poisoning Detection Hossein Aboutalebi , Dayou Mao , Rongqi Fan , and 3 more authors 2023 arXiv Bib @article{li2024genak, title = {DeepfakeArt Challenge: A Benchmark Dataset for Generative AI Art Forgery and Data Poisoning Detection}, author = {Aboutalebi, Hossein and Mao, Dayou and Fan, Rongqi and Xu, Carol and He, Chris and Wong, Alexander}, publisher = {arXiv preprint arXiv:2406.01574}, year = {2023}, google_scholar_id = {zYLM7Y9cAGgC}, }