-
Notifications
You must be signed in to change notification settings - Fork 1.3k
match beginning and end of line correctly #3575
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,10 +2,56 @@ package buffer | |
|
|
||
| import ( | ||
| "regexp" | ||
| "unicode/utf8" | ||
|
|
||
| "github.com/zyedidia/micro/v2/internal/util" | ||
| ) | ||
|
|
||
| // We want "^" and "$" to match only the beginning/end of a line, not the | ||
| // beginning/end of the search region if it is in the middle of a line. | ||
| // In that case we use padded regexps to require a rune before or after | ||
| // the match. (This also affects other empty-string patters like "\\b".) | ||
| // The following two flags indicate the padding used. | ||
| const ( | ||
| padStart = 1 << iota | ||
| padEnd | ||
| ) | ||
|
|
||
| func findLineParams(b *Buffer, start, end Loc, i int, r *regexp.Regexp) ([]byte, int, int, *regexp.Regexp) { | ||
| l := b.LineBytes(i) | ||
| charpos := 0 | ||
| padMode := 0 | ||
|
|
||
| if i == end.Y { | ||
| nchars := util.CharacterCount(l) | ||
| end.X = util.Clamp(end.X, 0, nchars) | ||
| if end.X < nchars { | ||
| l = util.SliceStart(l, end.X+1) | ||
| padMode |= padEnd | ||
| } | ||
| } | ||
|
|
||
| if i == start.Y { | ||
| nchars := util.CharacterCount(l) | ||
| start.X = util.Clamp(start.X, 0, nchars) | ||
| if start.X > 0 { | ||
| charpos = start.X - 1 | ||
| l = util.SliceEnd(l, charpos) | ||
| padMode |= padStart | ||
| } | ||
| } | ||
|
|
||
| if padMode == padStart { | ||
| r = regexp.MustCompile(".(?:" + r.String() + ")") | ||
| } else if padMode == padEnd { | ||
| r = regexp.MustCompile("(?:" + r.String() + ").") | ||
| } else if padMode == padStart|padEnd { | ||
| r = regexp.MustCompile(".(?:" + r.String() + ").") | ||
| } | ||
|
|
||
| return l, charpos, padMode, r | ||
| } | ||
|
|
||
| func (b *Buffer) findDown(r *regexp.Regexp, start, end Loc) ([2]Loc, bool) { | ||
| lastcn := util.CharacterCount(b.LineBytes(b.LinesNum() - 1)) | ||
| if start.Y > b.LinesNum()-1 { | ||
|
|
@@ -22,30 +68,19 @@ func (b *Buffer) findDown(r *regexp.Regexp, start, end Loc) ([2]Loc, bool) { | |
| } | ||
|
|
||
| for i := start.Y; i <= end.Y; i++ { | ||
| l := b.LineBytes(i) | ||
| charpos := 0 | ||
|
|
||
| if i == start.Y && start.Y == end.Y { | ||
| nchars := util.CharacterCount(l) | ||
| start.X = util.Clamp(start.X, 0, nchars) | ||
| end.X = util.Clamp(end.X, 0, nchars) | ||
| l = util.SliceStart(l, end.X) | ||
| l = util.SliceEnd(l, start.X) | ||
| charpos = start.X | ||
| } else if i == start.Y { | ||
| nchars := util.CharacterCount(l) | ||
| start.X = util.Clamp(start.X, 0, nchars) | ||
| l = util.SliceEnd(l, start.X) | ||
| charpos = start.X | ||
| } else if i == end.Y { | ||
| nchars := util.CharacterCount(l) | ||
| end.X = util.Clamp(end.X, 0, nchars) | ||
| l = util.SliceStart(l, end.X) | ||
| } | ||
| l, charpos, padMode, rPadded := findLineParams(b, start, end, i, r) | ||
|
|
||
| match := r.FindIndex(l) | ||
| match := rPadded.FindIndex(l) | ||
|
|
||
| if match != nil { | ||
| if padMode&padStart != 0 { | ||
| _, size := utf8.DecodeRune(l[match[0]:]) | ||
| match[0] += size | ||
| } | ||
| if padMode&padEnd != 0 { | ||
| _, size := utf8.DecodeLastRune(l[:match[1]]) | ||
| match[1] -= size | ||
| } | ||
| start := Loc{charpos + util.RunePos(l, match[0]), i} | ||
| end := Loc{charpos + util.RunePos(l, match[1]), i} | ||
| return [2]Loc{start, end}, true | ||
|
|
@@ -70,39 +105,39 @@ func (b *Buffer) findUp(r *regexp.Regexp, start, end Loc) ([2]Loc, bool) { | |
| } | ||
|
|
||
| for i := end.Y; i >= start.Y; i-- { | ||
| l := b.LineBytes(i) | ||
| charpos := 0 | ||
|
|
||
| if i == start.Y && start.Y == end.Y { | ||
| nchars := util.CharacterCount(l) | ||
| start.X = util.Clamp(start.X, 0, nchars) | ||
| end.X = util.Clamp(end.X, 0, nchars) | ||
| l = util.SliceStart(l, end.X) | ||
| l = util.SliceEnd(l, start.X) | ||
| charpos = start.X | ||
| } else if i == start.Y { | ||
| nchars := util.CharacterCount(l) | ||
| start.X = util.Clamp(start.X, 0, nchars) | ||
| l = util.SliceEnd(l, start.X) | ||
| charpos = start.X | ||
| } else if i == end.Y { | ||
| nchars := util.CharacterCount(l) | ||
| end.X = util.Clamp(end.X, 0, nchars) | ||
| l = util.SliceStart(l, end.X) | ||
| } | ||
|
|
||
| allMatches := r.FindAllIndex(l, -1) | ||
| charCount := util.CharacterCount(b.LineBytes(i)) | ||
| from := Loc{0, i}.Clamp(start, end) | ||
| to := Loc{charCount, i}.Clamp(start, end) | ||
|
|
||
| allMatches := b.findAll(r, from, to) | ||
| if allMatches != nil { | ||
| match := allMatches[len(allMatches)-1] | ||
| start := Loc{charpos + util.RunePos(l, match[0]), i} | ||
| end := Loc{charpos + util.RunePos(l, match[1]), i} | ||
| return [2]Loc{start, end}, true | ||
| return [2]Loc{match[0], match[1]}, true | ||
| } | ||
| } | ||
| return [2]Loc{}, false | ||
| } | ||
|
|
||
| func (b *Buffer) findAll(r *regexp.Regexp, start, end Loc) [][2]Loc { | ||
| var matches [][2]Loc | ||
| loc := start | ||
| for { | ||
| match, found := b.findDown(r, loc, end) | ||
| if !found { | ||
| break | ||
| } | ||
| matches = append(matches, match) | ||
| if match[0] != match[1] { | ||
| loc = match[1] | ||
| } else if match[1] != end { | ||
| loc = match[1].Move(1, b) | ||
| } else { | ||
| break | ||
| } | ||
| } | ||
| return matches | ||
| } | ||
|
|
||
| // FindNext finds the next occurrence of a given string in the buffer | ||
| // It returns the start and end location of the match (if found) and | ||
| // a boolean indicating if it was found | ||
|
|
@@ -146,53 +181,58 @@ func (b *Buffer) FindNext(s string, start, end, from Loc, down bool, useRegex bo | |
| } | ||
|
|
||
| // ReplaceRegex replaces all occurrences of 'search' with 'replace' in the given area | ||
| // and returns the number of replacements made and the number of runes | ||
| // and returns the number of replacements made and the number of characters | ||
| // added or removed on the last line of the range | ||
| func (b *Buffer) ReplaceRegex(start, end Loc, search *regexp.Regexp, replace []byte, captureGroups bool) (int, int) { | ||
| if start.GreaterThan(end) { | ||
| start, end = end, start | ||
| } | ||
|
|
||
| netrunes := 0 | ||
|
|
||
| charsEnd := util.CharacterCount(b.LineBytes(end.Y)) | ||
| found := 0 | ||
| var deltas []Delta | ||
|
|
||
| for i := start.Y; i <= end.Y; i++ { | ||
| l := b.lines[i].data | ||
| charpos := 0 | ||
|
|
||
| if start.Y == end.Y && i == start.Y { | ||
| l = util.SliceStart(l, end.X) | ||
| l = util.SliceEnd(l, start.X) | ||
| charpos = start.X | ||
| } else if i == start.Y { | ||
| l = util.SliceEnd(l, start.X) | ||
| charpos = start.X | ||
| } else if i == end.Y { | ||
| l = util.SliceStart(l, end.X) | ||
| } | ||
| newText := search.ReplaceAllFunc(l, func(in []byte) []byte { | ||
| var result []byte | ||
| if captureGroups { | ||
| for _, submatches := range search.FindAllSubmatchIndex(in, -1) { | ||
| result = search.Expand(result, replace, in, submatches) | ||
| l := b.LineBytes(i) | ||
| charCount := util.CharacterCount(l) | ||
| if (i == start.Y && start.X > 0) || (i == end.Y && end.X < charCount) { | ||
| // This replacement code works in general, but it creates a separate | ||
| // modification for each match. We only use it for the first and last | ||
| // lines, which may use padded regexps | ||
|
|
||
| from := Loc{0, i}.Clamp(start, end) | ||
| to := Loc{charCount, i}.Clamp(start, end) | ||
| matches := b.findAll(search, from, to) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Still, what is the advantage of using
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. One could do that. I wrote the code with the idea in mind to have some public
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Somehow I missed the email notification about your comments a week ago...
Still, what for? Why not keep things simple?
A single line may be huge. (Although that's an edge case, and currently micro handles huge lines very poorly anyway, for unrelated reasons, due to the dumb data structure with O(n) access, which IMO we should fix some day, because that's a shame.) But my point is not so much about performance but about simplicity. We have a chance to try to make the logic not just more correct but at the same time simpler, by simply calling
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we want to call Apart from this list reversal, this would indeed lead to fairly simple code. Padded repexps would be compiled for almost every call to Please let me know how you want to proceed. EDIT: Modifying the processing of deltas should be easy to do (in
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Indeed, good point. Thanks for explanations. I've analyzed all this in more detail and now it seems to me that the current approach in this PR is already as simple and practical as it can be without further refactoring of EventHandler (which we can do separately, if we want to).
Yeah, that's also an argument.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Would you be interested in a separate PR that modifies
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
If it goes well, why not?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's now the first part of #3658. |
||
| found += len(matches) | ||
|
|
||
| for j := len(matches) - 1; j >= 0; j-- { | ||
| // if we counted upwards, the different deltas would interfere | ||
| match := matches[j] | ||
| var newText []byte | ||
| if captureGroups { | ||
| newText = search.ReplaceAll(b.Substr(match[0], match[1]), replace) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've noticed that because of this
This "fixes" both problems for me: diff --git a/internal/buffer/search.go b/internal/buffer/search.go
index a48e1f87..f2e645e3 100644
--- a/internal/buffer/search.go
+++ b/internal/buffer/search.go
@@ -209,7 +209,7 @@ func (b *Buffer) ReplaceRegex(start, end Loc, search *regexp.Regexp, replace []b
// if we counted upwards, the different deltas would interfere
match := matches[j]
var newText []byte
- if captureGroups {
+ if captureGroups && match[0] != match[1] {
newText = search.ReplaceAll(b.Substr(match[0], match[1]), replace)
} else {
newText = replacebut this is an incorrect hack (at least because we should not disable expanding I'm not sure how to fix it correctly.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This bug is in master already. The problem is to apply the given regexp to The good news is that this problem does not seem to exist in my draft PR #3658. There, instead of using For EDIT: I think that #3658 works for interactive replaces, too. The reason is that the added padding makes the problem of an empty match disappear, and then
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, for sure this is not a regression.
Cool. Yeah I see, to fix the problem we just need to I'll take a look at #3658 once I have some time. In the meantime I think we can merge this PR now anyway. |
||
| } else { | ||
| newText = replace | ||
| } | ||
| } else { | ||
| result = replace | ||
| deltas = append(deltas, Delta{newText, match[0], match[1]}) | ||
| } | ||
| found++ | ||
| if i == end.Y { | ||
| netrunes += util.CharacterCount(result) - util.CharacterCount(in) | ||
| } | ||
| return result | ||
| }) | ||
|
|
||
| from := Loc{charpos, i} | ||
| to := Loc{charpos + util.CharacterCount(l), i} | ||
|
|
||
| deltas = append(deltas, Delta{newText, from, to}) | ||
| } else { | ||
| newLine := search.ReplaceAllFunc(l, func(in []byte) []byte { | ||
| found++ | ||
| var result []byte | ||
| if captureGroups { | ||
| match := search.FindSubmatchIndex(in) | ||
| result = search.Expand(result, replace, in, match) | ||
| } else { | ||
| result = replace | ||
| } | ||
| return result | ||
| }) | ||
| deltas = append(deltas, Delta{newLine, Loc{0, i}, Loc{charCount, i}}) | ||
| } | ||
| } | ||
|
|
||
| b.MultipleReplace(deltas) | ||
|
|
||
| return found, netrunes | ||
| return found, util.CharacterCount(b.LineBytes(end.Y)) - charsEnd | ||
| } | ||
Uh oh!
There was an error while loading. Please reload this page.