From a7b075d85ca1862a9579b31cc9ce65534913f54e Mon Sep 17 00:00:00 2001 From: "S.Nakamatsu" <19329+snaka@users.noreply.github.com> Date: Fri, 2 Aug 2024 22:53:57 +0900 Subject: [PATCH 1/3] fix: Unstable search behaviour According to the GitHub API specification, The structure of a query consists of one or more KEYWORDs and one one more QUALIFIERs, as follows. ``` KEYWORD_1 KEYWORD_2 QUALIFIER_1 QUALIFIER_2 ``` The character limit of query does not apply to QUALIFIERs, so the part excluding the QUALIFIERs must be validated. See-also: https://docs.github.com/en/rest/search/search?apiVersion=2022-11-28#limitations-on-query-length --- tagpr.go | 24 +++++++++++++++--------- tagpr_test.go | 10 ++++------ 2 files changed, 19 insertions(+), 15 deletions(-) diff --git a/tagpr.go b/tagpr.go index 7f9d587..48c7c97 100644 --- a/tagpr.go +++ b/tagpr.go @@ -606,11 +606,12 @@ func (tp *tagpr) generatenNextLabels(prIssues []*github.Issue) []string { return nextLabels } -func buildChunkSearchIssuesQuery(queryBase string, shasStr string) (chunkQueries []string) { - query := queryBase +func buildChunkSearchIssuesQuery(qualifiers string, shasStr string) (chunkQueries []string) { + // array of SHAs + keywords := make([]string, 0, 25) // Make bulk requests with multiple SHAs of the maximum possible length. // If multiple SHAs are specified, the issue search API will treat it like an OR search, - // and all the pull requests will be searched.u + // and all the pull requests will be searched. // This is difficult to read from the current documentation, but that is the current // behavior and GitHub support has responded that this is the spec. for _, sha := range strings.Split(shasStr, "\n") { @@ -618,16 +619,21 @@ func buildChunkSearchIssuesQuery(queryBase string, shasStr string) (chunkQueries continue } // Longer than 256 characters are not supported in the query. + // Note that the length limit does not include the qualifiers (ex. "repo:owner/repo", "is:close"). // ref. https://docs.github.com/en/rest/reference/search#limitations-on-query-length - if len(query)+1+len(sha) >= 256 { - chunkQueries = append(chunkQueries, query) - query = queryBase + // + // Also, from the results of the experiment, it is possible that when counting + // the number of characters in the keyword part, one space character is counted + // as three characters (possibly '%20'). + if len(strings.Join(keywords, "%20") + "%20" + sha) >= 256 { + chunkQueries = append(chunkQueries, qualifiers + " " + strings.Join(keywords, " ")) + keywords = make([]string, 0, 25) } - query += " " + sha + keywords = append(keywords, sha) } - if query != queryBase { - chunkQueries = append(chunkQueries, query) + if len(keywords) > 0 { + chunkQueries = append(chunkQueries, qualifiers + " " + strings.Join(keywords, " ")) } return chunkQueries diff --git a/tagpr_test.go b/tagpr_test.go index ec7e67a..b3c0f30 100644 --- a/tagpr_test.go +++ b/tagpr_test.go @@ -100,9 +100,9 @@ def3db8 c0fc143 `, []string{ - "repo:Songmu/tagpr is:pr is:closed 1a8bb97 1b7691b a9462b9 4d2b5e9 9ce4268 1eccbf8 1c3fbfc 968ade5 531c782 780bb71 6025fbf cc369ba a1f3e39 792bc85 3e3c4e1 37832de ac97702 d742186 217eb5d 0f900f7 5ef33d1 1d2ec15 2f37752 066ad7b 2e19b14 52b3706 f5134ae", - "repo:Songmu/tagpr is:pr is:closed ea39bbf 76b0630 ee3c6e6 2336be4 423a209 63caa74 3296052 3c98d78 86b8739 2264ec5 5c1d87b 4ffe09c 7c5d0de 3de9ed0 1b6b58c 2b643ec 53bf089 e8e96d5 3dac4b0 0605ba4 86cb76d 358c7c1 a139f86 33c16b6 c91f8ff a109671 b4029bd", - "repo:Songmu/tagpr is:pr is:closed f985b4f b74ef35 53d9ab3 6f57b07 0a84d90 43aa57d 75b6f79 def3db8 c0fc143", + "repo:Songmu/tagpr is:pr is:closed 1a8bb97 1b7691b a9462b9 4d2b5e9 9ce4268 1eccbf8 1c3fbfc 968ade5 531c782 780bb71 6025fbf cc369ba a1f3e39 792bc85 3e3c4e1 37832de ac97702 d742186 217eb5d 0f900f7 5ef33d1 1d2ec15 2f37752 066ad7b 2e19b14", + "repo:Songmu/tagpr is:pr is:closed 52b3706 f5134ae ea39bbf 76b0630 ee3c6e6 2336be4 423a209 63caa74 3296052 3c98d78 86b8739 2264ec5 5c1d87b 4ffe09c 7c5d0de 3de9ed0 1b6b58c 2b643ec 53bf089 e8e96d5 3dac4b0 0605ba4 86cb76d 358c7c1 a139f86", + "repo:Songmu/tagpr is:pr is:closed 33c16b6 c91f8ff a109671 b4029bd f985b4f b74ef35 53d9ab3 6f57b07 0a84d90 43aa57d 75b6f79 def3db8 c0fc143", }, }, { @@ -132,11 +132,9 @@ d742186 2f37752 066ad7b 2e19b14 -52b3706 -f5134ae `, []string{ - "repo:Songmu/tagpr is:pr is:closed 1a8bb97 1b7691b a9462b9 4d2b5e9 9ce4268 1eccbf8 1c3fbfc 968ade5 531c782 780bb71 6025fbf cc369ba a1f3e39 792bc85 3e3c4e1 37832de ac97702 d742186 217eb5d 0f900f7 5ef33d1 1d2ec15 2f37752 066ad7b 2e19b14 52b3706 f5134ae", + "repo:Songmu/tagpr is:pr is:closed 1a8bb97 1b7691b a9462b9 4d2b5e9 9ce4268 1eccbf8 1c3fbfc 968ade5 531c782 780bb71 6025fbf cc369ba a1f3e39 792bc85 3e3c4e1 37832de ac97702 d742186 217eb5d 0f900f7 5ef33d1 1d2ec15 2f37752 066ad7b 2e19b14", }, }, } From bbcd48bba39ebe5157782c0f31c2aa852dac038f Mon Sep 17 00:00:00 2001 From: "S.Nakamatsu" <19329+snaka@users.noreply.github.com> Date: Wed, 7 Aug 2024 20:59:24 +0900 Subject: [PATCH 2/3] No escapes, but give margins for upper limits --- tagpr.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/tagpr.go b/tagpr.go index 48c7c97..d766c19 100644 --- a/tagpr.go +++ b/tagpr.go @@ -619,13 +619,14 @@ func buildChunkSearchIssuesQuery(qualifiers string, shasStr string) (chunkQuerie continue } // Longer than 256 characters are not supported in the query. - // Note that the length limit does not include the qualifiers (ex. "repo:owner/repo", "is:close"). // ref. https://docs.github.com/en/rest/reference/search#limitations-on-query-length // - // Also, from the results of the experiment, it is possible that when counting - // the number of characters in the keyword part, one space character is counted - // as three characters (possibly '%20'). - if len(strings.Join(keywords, "%20") + "%20" + sha) >= 256 { + // However, although not explicitly stated in the documentation, the space separating + // keywords is counted as one or more characters, so it is possible to exceed 256 + // characters if the text is filled to the very limit of 256 characters. + // For this reason, the maximum number of chars in the KEYWORD section is limited here to 200. + tempKeywords := append(keywords, sha) + if len(strings.Join(tempKeywords, " ")) >= 200 { chunkQueries = append(chunkQueries, qualifiers + " " + strings.Join(keywords, " ")) keywords = make([]string, 0, 25) } From 0b6638082e035b83d53948bd900a3ec3168886bf Mon Sep 17 00:00:00 2001 From: "S.Nakamatsu" <19329+snaka@users.noreply.github.com> Date: Wed, 7 Aug 2024 21:50:13 +0900 Subject: [PATCH 3/3] Declare the character limit as a constant --- tagpr.go | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/tagpr.go b/tagpr.go index d766c19..3bccb9a 100644 --- a/tagpr.go +++ b/tagpr.go @@ -607,6 +607,16 @@ func (tp *tagpr) generatenNextLabels(prIssues []*github.Issue) []string { } func buildChunkSearchIssuesQuery(qualifiers string, shasStr string) (chunkQueries []string) { + // Longer than 256 characters are not supported in the query. + // ref. https://docs.github.com/en/rest/reference/search#limitations-on-query-length + // + // However, although not explicitly stated in the documentation, the space separating + // keywords is counted as one or more characters, so it is possible to exceed 256 + // characters if the text is filled to the very limit of 256 characters. + // For this reason, the maximum number of chars in the KEYWORD section is limited to + // the following number. + const maxKeywordsLength = 200 + // array of SHAs keywords := make([]string, 0, 25) // Make bulk requests with multiple SHAs of the maximum possible length. @@ -618,15 +628,8 @@ func buildChunkSearchIssuesQuery(qualifiers string, shasStr string) (chunkQuerie if strings.TrimSpace(sha) == "" { continue } - // Longer than 256 characters are not supported in the query. - // ref. https://docs.github.com/en/rest/reference/search#limitations-on-query-length - // - // However, although not explicitly stated in the documentation, the space separating - // keywords is counted as one or more characters, so it is possible to exceed 256 - // characters if the text is filled to the very limit of 256 characters. - // For this reason, the maximum number of chars in the KEYWORD section is limited here to 200. tempKeywords := append(keywords, sha) - if len(strings.Join(tempKeywords, " ")) >= 200 { + if len(strings.Join(tempKeywords, " ")) >= maxKeywordsLength { chunkQueries = append(chunkQueries, qualifiers + " " + strings.Join(keywords, " ")) keywords = make([]string, 0, 25) }