@Proceedings{findings-CCL2023:2023,
  editor    = {Maosong, Sun  and  Bing, Qin  and  Xipeng, Qiu  and  Jing, Jiang  and  Xianpei, Han  and  Gaoqi, Rao and Yubo, Chen},
  editor    = {孙, 茂松  and  秦, 兵  and  邱, 锡鹏  and  蒋, 静  and  韩, 先培 and 饶, 高琦 and 陈, 玉博},
  title     = {"Proceedings of the 22nd Chinese National Conference on Computational Linguistics"},
  month          = {"August"},
  year           = {"2023"},
  address        = {"Harbin, China"},
  publisher      = {"Chinese Information Processing Society of China"},
  url       = {https://www.aclweb.org/anthology/2023.ccl2023-findings}
}

@inproceedings{Zhang-etal-2023-ji
    author = {张, 腾勋  and  许, 鸿飞  and  约瑟夫·范·吉纳比斯  and  熊, 德意  and  昝, 红英},
    author = {Tengxun, Zhang  and  Hongfei, Xu  and  Joseph, Van Ginabis  and  Deyi, Xiong  and  Hongying Zan},
    title = {基于软掩码增强数值表示的表格-文本混合问答(Hybrid Tabular-Textual Question Answering Based on SoftMasking Enhanced Numerical Representation)},
    booktitle = {"Findings of the Proceedings of the 22nd China National Conference on Computational Linguistics"},
    month = {"August"},
    year = {"2023"},
    address = {"Harbin, China"},
    publisher = {"Chinese Information Processing Society of China"},
    url = {https://aclanthology.org/2023.findings-ccl-1.1},
    pages = {1--12},
    abstract = {"表格-文本混合问答需要从异质数据中进行数值推理得到答案，当前的研究将问题、表格、文本拼接作为输入，直接使用LSTM作为解码器生成操作数，但输入中包含太多的不相关数值和文本信息，如何从中抽取数值推理需要的数值是此任务面临的重要挑战之一。本文提出一种新的软掩码方法增强数值抽取的性能，通过软掩码不相关的数值和文本，然后再送入解码器中生成操作数。本文在ConvFinQA和MultiHiertt数据 集 上 进 行 实 验 ， 结 果 表 明 通 过 软 掩 码 增 强 数 值 表 示 可 显 著 提 升 模 型 的 性 能（+3.09/+2.96 Exe/Prog Acc和+5.29/+3.45 EM/F1）。本文还通过消融实验分析软掩码相对于硬掩码的优势，并讨论了当前表格-文本混合问答的局限性与未来方向。"},
    language = "Chinese",
}

@inproceedings{Hu-etal-2023-ji
    author = {胡, 峻源  and  周, 小璐  and  谭, 龙},
    author = {Junyuan, Hu  and  Xiaolu, Zhou  and  Long, Tan},
    title = {基于缓解错误传递策略的对话状态跟踪(Dialogue State Tracking Based on Error Propagation Mitigation Strategy)},
    booktitle = {"Findings of the Proceedings of the 22nd China National Conference on Computational Linguistics"},
    month = {"August"},
    year = {"2023"},
    address = {"Harbin, China"},
    publisher = {"Chinese Information Processing Society of China"},
    url = {https://aclanthology.org/2023.findings-ccl-1.2},
    pages = {13--23},
    abstract = {"对话状态跟踪模块是任务型对话系统的核心组件。现有的一些对话状态跟踪方法基于上一轮的对话状态生成轮级状态，存在错误传递的问题，会对后续预测产生影响。因此本文提出了个基于缓解错误传递策略的对话状态跟踪模型，该模型使用对话级状态作为预测目标，在模型训练时以一定的概率随机删除前一轮次的对话状态，迫使模型在不完全可信的对话状态信息中学会纠正错误。本文在嘈杂(MultiWOZ 2.1）和洁净(MultiWOZ 2.4）数据集的实验表明，该模型相比较于基线模型有更好的错误修正能力，模型的联合准确率(MultiWOZ 2.4）达到了70.95%的良好性能表现。"},
    language = "Chinese",
}

@inproceedings{Wuyunga-etal-2023-meng
    author = {乌云嘎  and  包, 艳花  and  达胡白乙拉},
    author = {Wuyunga  and  Bao, Yanhua  and  Dahubaiyila}
    title = {蒙古语形容词短语语义角色特征的统计考察(A Statistical Study on the Semantic Role Characteristics of Mongolian Adjective Phrases)},
    booktitle = {"Findings of the Proceedings of the 22nd China National Conference on Computational Linguistics"},
    month = {"August"},
    year = {"2023"},
    address = {"Harbin, China"},
    publisher = {"Chinese Information Processing Society of China"},
    url = {https://aclanthology.org/2023.findings-ccl-1.3},
    pages = {24--33},
    abstract = {"蒙古语中形容词短语数量多且用途广，在句子中可以充当多种语义角色。统计考察蒙古语形容词短语语义角色的特征对语法、语义的研究和应用具有重要的理论意义和实用价值。本文基于《蒙古语形容词短语语义角色信息数据库》，从形容词短语语义角色类别及频率、长度、长度与类别、内部结构关系类型及其频率、内部结构关系与类别之间的关系、内部结构关系类型与长度之间的关系、与形容词短语语义角色相关联的句子成分类型及频率、与形容词短语语义角色类别关联的句子成分类别及频率等8个方面进行了统计分析。经研究发现，该数据库含有形容词短语语义角色640条，出现的类别有4个大类14个小类，出现最多的大类是饰体，出现最多的小类是饰体大类中的性状语义角色；形容词短语语义角色的长度分布在2–12个词之间，其中长度为2个词的出现频率最高；长度为2个词的形容词短语语义角色覆盖的语义角色类别和出现次数最多；形容词短语语义角色覆盖的内部结构关系类别有7种，其中定体关系出现频率最高，且所对应语义角色类别最多；形容词短语语义角色与4种句子成分相对应，其中与定语对应出现的最多；与形容词短语语义角色类别关联的句子成分类别及频率统计中，每个语义角色对应一种句子成分。"},
    language = "Chinese",
}

@inproceedings{Yan-etal-2023-ji
    author = {闫, 婧涛  and  李, 旸  and  王, 素格  and  廖, 健  and  普, 瑞丽  and  潘, 邦泽  and  李, 德玉},
    author = {Jingtao, Yan  and  Yang, Li  and  Suge, Wang  and  Jian, Liao  and  Reili, Pu  and  Bangze, Fan  and  Deyu, Li}
    title = {基于联合学习的语言粒度融合的重叠事件抽取方法(Overlapping Event Extraction Method of Language Granularity Fu-sion Based on Joint Learning)},
    booktitle = {"Findings of the Proceedings of the 22nd China National Conference on Computational Linguistics"},
    month = {"August"},
    year = {"2023"},
    address = {"Harbin, China"},
    publisher = {"Chinese Information Processing Society of China"},
    url = {https://aclanthology.org/2023.findings-ccl-1.4},
    pages = {34--45},
    abstract = {"事件抽取是一项重要的信息抽取任务，旨在从非结构化文本中抽取结构化的事件信息。现有的事件抽取方法大多假设一个句子中仅出现一个事件，未考虑重叠事件，然而，在真实的场景下，重叠事件是不可避免的。因此，该文提出了一种基于联合学习的语言粒度融合的重叠事件抽取方法，该方法利用预训练语言模型孂孅孒孔对句子进行初始表示，再以孴孯孫孥孮数目逐层递增和逐层递减的方式，分别构建包含不同孴孯孫孥孮数目的片段表示，旨在将短文本包含的离散信息传递给长文本，再将长文本包含的抽象化语义信息传递给短文本，建立基于语言粒度融合的句子表示。进一步，使用门控机制，获得融合不同粒度特征和目标事件感知的句子表示。最后，在预测阶段，通过计算句子中词间距离感知得分，并行的预测词间的片段和角色关系，再利用关系标签解码，基于联合学习获得了事件触发词、论元及事件类型和论元角色。在公开的事件抽取数据集孆孥孷孆孃上进行实验，结果表明该文提出的方法对重叠事件的抽取是有效的。"},
    language = "Chinese",
}

@inproceedings{Li-etal-2023-ji
    author = {李, 书琪  and  王, 素格  and  王, 典  and  李, 爱琳  and  雷, 洋},
    author = {Shuqi, Li  and  Suge, Wang  and  Dian, Wang  and  Ailin, Li  and  Yang, Lei}
    title = {基于知识融合的散文物象识别方法研究(Research on the Recognition Method of Prose Image Based on Knowledge Fusion)},
    booktitle = {"Findings of the Proceedings of the 22nd China National Conference on Computational Linguistics"},
    month = {"August"},
    year = {"2023"},
    address = {"Harbin, China"},
    publisher = {"Chinese Information Processing Society of China"},
    url = {https://aclanthology.org/2023.findings-ccl-1.5},
    pages = {46--56},
    abstract = {"在散文中，作者通常采用物象寄托自己的所思所想，以达到情物交融、托物言志，为此，本文提出一种融合多种知识的散文物象识别方法。该方法利用情感词表、隐喻特征库以及现代汉语词典等外部知识库，使用Cross-Transformer中的交叉多头注意力机制，将外部知识融入散文句子的表示中；进一步通过条件层归一化，将融合情感词的句子表示、融合隐喻特征库和现代汉语词典的句子表示分别变换为二维网格表示，并利用相对位置信息建立位置的二维网格表示，在此基础上，将三者的二维网格表示拼接，利用膨胀卷积对其进行编码；再分别使用双仿射分类器和多层感知机得到最终的字间关系分类结果。实验结果表明，本文提出的方法能够有效识别散文物象，可为散文阅读理解的思想情感类的问题解答提供一定技术支撑。"},
    language = "Chinese",
}

@inproceedings{Zhu-etal-2023-ji
    author = {朱, 彦霏  and  郭, 梦清  and  陈, 颖诗  and  于, 东  and  刘, 鹏远},
    author = {Yanfei, Zhu  and  Mengqing, Guo  and  Yingshi, Chen  and  Dong, Yu  and  Pengyuan, Liu}
    title = {基于语言计量的道德与性别共时历时研究(A Synchronic and Diachronic Study on Gender and Morality Based on Quantitative Linguistics)},
    booktitle = {"Findings of the Proceedings of the 22nd China National Conference on Computational Linguistics"},
    month = {"August"},
    year = {"2023"},
    address = {"Harbin, China"},
    publisher = {"Chinese Information Processing Society of China"},
    url = {https://aclanthology.org/2023.findings-ccl-1.6},
    pages = {57--67},
    abstract = {"不同性别与道德间的变化关系是人文学科领域研究的热点，但目前的研究大多是定性研究，缺乏大规模数据的支撑。本文对包含有道德类别、道德极性和道德强度属性的道德词与性别词进行计量分析，研究了道德与性别在共时和历时层面上的变化关系。结果发现，共时层面上，各语体在职业道德与性别之间的关系上达成了共识，而在家庭美德上的差异最大；百度百科中体现了道德规范对女性的束缚，即女性比男性更需要维系自己的正面道德形象；与此同时，社会道德规范将女性塑造为癜天使瘢或癜荡妇瘢的两级形象。历时层面上，《人民日报》中男女性别和道德的关联随时间呈现一致的变化趋势，且更加关注女性的道德；对不同性别的道德关注点不同，更加关注女性的个人品德和男性的家庭美德；瘲瘰瘰瘰年附近女性与道德之间的联系最低，甚至低于男性，主要是受到国家女性政策的影响。"},
    language = "Chinese",
}

@inproceedings{Danzhengji-etal-2023-zang
    author = {旦正吉  and  华却才让  and  完么措  and  白, 颖},
    author = {Danzhengji  and  Huaquecairang  and  Wanmechuo  and  Ying, Bai},
    title = {藏语句子语义组块标注数据集的构建方法研究(A Study on the Construction of a Tibetan Sentence Semantic Block Annotation Dataset)},
    booktitle = {"Findings of the Proceedings of the 22nd China National Conference on Computational Linguistics"},
    month = {"August"},
    year = {"2023"},
    address = {"Harbin, China"},
    publisher = {"Chinese Information Processing Society of China"},
    url = {https://aclanthology.org/2023.findings-ccl-1.7},
    pages = {68--78},
    abstract = {"语义组块对自然语言的语义理解和分析有着重要的作用，其自动标注技术依赖于良好的语义组块标注训练数据集。目前，藏语方面未发现语义组块研究方面的分类体系，考虑到按粗粒度分析语义不利于语义解析和知识抽取等任务，选择了细粒度语义分析方法，依据不同藏文句型中语义组块的结构特征，制定了藏语句子语义组块标注规范 (TSSCTS-13)。在此基础上，构建了一个实用的藏语句子语义组块标注资源库(TSSCTL-44302)。截至目前，共完成了 498619 个语义组块标注，并在该文提出的藏文音节向量和 BILSTM-CRF 相结合模型上完成了自动识别的实验。综合测试实验结果F1 值为 95.28%，精确率为 94.95%，召回率为 95.62%，结果表明该文构建的数据集可以应用于藏语语义领域的语义组块识别任务。"},
    language = "Chinese",
}

@inproceedings{Yang-etal-2023-han
    author = {杨, 春雷},
    author = {Chunlei, Yang},
    title = {汉语语法工程研究的意义、内容与方法(Chinese Grammar Engineering: Significance, Content and Methodology)},
    booktitle = {"Findings of the Proceedings of the 22nd China National Conference on Computational Linguistics"},
    month = {"August"},
    year = {"2023"},
    address = {"Harbin, China"},
    publisher = {"Chinese Information Processing Society of China"},
    url = {https://aclanthology.org/2023.findings-ccl-1.8},
    pages = {79--89},
    abstract = {"面向深层语言处理的汉语语法工程（Grammar Engineering,GE）不仅可弥补在自然语言处理领域亟需的系统汉语语法理论，还可提供计算实现的平台。首先，通过数据分析和文献梳理，指出构建汉语GE的迫切性和重要性。然后，从语言普遍性视角设计了面向深层语言处理的汉语GE研究内容，尤其是重点研究汉语特殊语法现象的描写。接着，提出跨语言学本体和计算实现两个领域的具体研究方法。最后，介绍多个语言学层面的研究成果，验证了本方法的可行性。"},
    language = "Chinese",
}

@inproceedings{Lu-etal-2023-ji
    author = {芦, 靖雅  and  陈, 瑾  and  张, 艺璇  and  常, 博林  and  许, 智星  and  李, 斌  and  王东波},
    author = {Jingya, Lu  and  Jin, Chen  and  Yixuan, Zhang  and  Bolin, Chang  and  Zhixing, Xu  and  Bin, Li  and  Dongbo, Wang},
    title = {基于抽象语义标注的《左传》语料库构建初探(A Preliminary Investigation on construction of ZuoZhuan corpus based on Abstract Meaning Representation)},
    booktitle = {"Findings of the Proceedings of the 22nd China National Conference on Computational Linguistics"},
    month = {"August"},
    year = {"2023"},
    address = {"Harbin, China"},
    publisher = {"Chinese Information Processing Society of China"},
    url = {https://aclanthology.org/2023.findings-ccl-1.9},
    pages = {90--00},
    abstract = {"抽象语义表示（Abstract Meaning Representation,AMR）是一种基于句子的语义表示方法，其核心是单根有向无环图。因其在语义表示方面的简单便捷的优势进而发展出了中文AMR、韩文AMR、西班牙语AMR等。同时构建了对应的标注库，相应的评测和解析都取得了不错的成绩。古汉语语料库的建设有分词和词性标注语料库，但仍缺少古汉语语义库。为此，本文基于一部优秀的上古汉语文献饼饼《左传》，提出了一种为古汉语服务的抽象语义表示方法，制定抽象语义标注规范，初步建成《左传》1500句抽象语义标注语料库，以期为古汉语的语义表示和语料库建设提供参考。"},
    language = "Chinese",
}

@inproceedings{Feng-etal-2023-rong
    author = {冯,艳  and  蒲, 飞  and  杨柏林},
    author = {Yan, Feng  and  Fei, Pu  and  Bolin Yang},
    title = {融合实体多层语义的四元数表示学习方法(Quaternion Representation Learning Method to Fusion Multilayer Semantics of Entities)},
    booktitle = {"Findings of the Proceedings of the 22nd China National Conference on Computational Linguistics"},
    month = {"August"},
    year = {"2023"},
    address = {"Harbin, China"},
    publisher = {"Chinese Information Processing Society of China"},
    url = {https://aclanthology.org/2023.findings-ccl-1.10},
    pages = {100--110},
    abstract = {"知识图谱作为人工智能领域中一个重要的研究分支，通常采用三元组（头实体，关系，尾实体）的形式描述和存储客观世界中的信息。由于知识图谱普遍存在数据稀疏的问题，即知识图谱不完备问题，因此需要借助机器学习和表示学习来补全知识图谱。针对这个问题，本文提出一种融合实体多层语义的四元数表示学习方法，通过将实体和关系建模在超复数空间内，用四元数和单位四元数分别表示头尾实体和关系，利用关系单位四元数对头实体和尾实体分别做右等斜旋转和逆方向的右等斜旋转来融合实体的多层语义信息，在能够推理和建模多种复杂关系模式（对称/反对称、反转和组合关系模式等）的同时，也能捕获到实体和关系之间隐含的多层交互信息。本文对比几个主流的知识图谱嵌入模型，在四个公开数据集WN18、WN18RR、FB15K和FB15K-237上进行链接预测实验，实验结果显示，本文提出的方法相对于已有的知识图谱嵌入模型的性能有了明显改进和提升。"},
    language = "Chinese",
}

@inproceedings{Huang-etal-2023-a
    author = {Huang, Chunxiao  and  Li, Chunyu  and  Yao, Shaowen  and  Bao Mingsuo},
    title = {A Novel Method for Semantic Analysis of Cantonese Text Based on Can_Man Dictionary},
    booktitle = {"Findings of the Proceedings of the 22nd China National Conference on Computational Linguistics"},
    month = {"August"},
    year = {"2023"},
    address = {"Harbin, China"},
    publisher = {"Chinese Information Processing Society of China"},
    url = {https://aclanthology.org/2023.findings-ccl-1.11},
    pages = {111--117},
    abstract = {"A novel scheme to address the lack of a corpus for semantic analysis of Cantonese sen-tences is presented. Scheme need prepare a POS Dictionary, some additional CHATformat files and a Can_Man Dic-tionary. For those words or phrases with unique part-of-speech (commonly known as POS), we established a dictionary which we call POSDictionary and saved as a JSON file. We export a Cantonese phase file from the examplesentences of the Great Dictionary of Hong Kong Cantonese and other corpus sources,annotate Jyutping Romanization and annotate them with POS tags, finally generate theadditional CHAT format file. Furthermore, we make 60 CHAT format files from HKCCcorpus. And we established a dictionary according to the Hong Kong Cantonese Dic-tionary and saved as a JSON file, which we call Can_Man Dictionary. When parsingCantonese text, we first execute word segmentation with PyCantonese, then do othertasks including POS tagging with the POS Dictionary and PyCantonese's pos_taggingmodule, converting the Cantonese text's words into Mandarin words with the Can_ManDictionary to reconstruct a Mandarin words List and do some semantic parsing tasksuch as Cantonese Abstract Meaning Representation(CanAMR) and Semantic Depen-dency Parsing(SDP) on the Mandarin word List with Hanlp, visual display all the aboveparsing results, etc. Test results show the superior performance of the scheme and po-tential for the parsing Cantonese text."},
    language = "English",
}

@inproceedings{Yitao-etal-2023-grammatical
    author = {Yitao, Liu and Mark, Dras},
    title = {Grammatical Error Correction based on Domain Adaptation},
    booktitle = {"Findings of the Proceedings of the 22nd China National Conference on Computational Linguistics"},
    month = {"August"},
    year = {"2023"},
    address = {"Harbin, China"},
    publisher = {"Chinese Information Processing Society of China"},
    url = {https://aclanthology.org/2023.findings-ccl-1.12},
    pages = {118--129},
    abstract = {"A common issue for grammatical error correction (GEC) is how to combine the native corpus andthe corpus from English as Second Language (ESL) learners together to train the GEC model.For example, though can be trained by the native corpus only, a GEC classifier performed betterwhen trained by the ESL corpus. However, due to the small quantity of the ESL corpus, the nativecorpus needs to be utilized as well to solve the data-limitation problem. Unlike some previousworks which combined them in specific ways or using specific classifiers, we consider this asa domain adaptation problem and provide a common method. It is based on FRUSTRATINGLYEASY DOMAIN ADAPTATION (Daum´e III, 2007), which augments the feature vectors directly toimprove the classifier. We examine this method for correcting article errors along with a numberof baseline systems, and prove that it performs effectively when using appropriate classifiers."},
    language = "English",
}