@inproceedings{Huang-etal-2023-a
    author = {Huang, Chunxiao  and  Li, Chunyu  and  Yao, Shaowen  and  Bao, Mingsuo},
    title = {A Novel Method for Semantic Analysis of Cantonese Text Based on Can_Man Dictionary},
    booktitle = {"Findings of the Proceedings of the 22nd China National Conference on Computational Linguistics"},
    month = {"August"},
    year = {"2023"},
    address = {"Harbin, China"},
    publisher = {"Chinese Information Processing Society of China"},
    url = {https://aclanthology.org/2023.findings-ccl-1.11},
    pages = {111--117},
    abstract = {"A novel scheme to address the lack of a corpus for semantic analysis of Cantonese sen-tences is presented. Scheme need prepare a POS Dictionary, some additional CHATformat files and a Can_Man Dic-tionary. For those words or phrases with unique part-of-speech (commonly known as POS), we established a dictionary which we call POSDictionary and saved as a JSON file. We export a Cantonese phase file from the examplesentences of the Great Dictionary of Hong Kong Cantonese and other corpus sources,annotate Jyutping Romanization and annotate them with POS tags, finally generate theadditional CHAT format file. Furthermore, we make 60 CHAT format files from HKCCcorpus. And we established a dictionary according to the Hong Kong Cantonese Dic-tionary and saved as a JSON file, which we call Can_Man Dictionary. When parsingCantonese text, we first execute word segmentation with PyCantonese, then do othertasks including POS tagging with the POS Dictionary and PyCantonese's pos_taggingmodule, converting the Cantonese text's words into Mandarin words with the Can_ManDictionary to reconstruct a Mandarin words List and do some semantic parsing tasksuch as Cantonese Abstract Meaning Representation(CanAMR) and Semantic Depen-dency Parsing(SDP) on the Mandarin word List with Hanlp, visual display all the aboveparsing results, etc. Test results show the superior performance of the scheme and po-tential for the parsing Cantonese text."},
    language = "English",
}