From 5c4f2d6aff168303e28d478107c169c750bc9b39 Mon Sep 17 00:00:00 2001 From: Myzel394 <50424412+Myzel394@users.noreply.github.com> Date: Sat, 12 Oct 2024 15:23:43 +0200 Subject: [PATCH] feat(server): Add concept of virtual lines --- server/common/parser/strings.go | 54 ++++++- server/common/parser/strings_test.go | 26 ++++ server/common/strings.go | 23 +++ server/common/virtual-line.go | 205 +++++++++++++++++++++++++++ server/common/virtual-line_test.go | 182 ++++++++++++++++++++++++ server/go.mod | 8 +- server/go.sum | 2 + server/utils/strings.go | 64 +++++++++ server/utils/strings_test.go | 119 ++++++++++++++++ 9 files changed, 678 insertions(+), 5 deletions(-) create mode 100644 server/common/strings.go create mode 100644 server/common/virtual-line.go create mode 100644 server/common/virtual-line_test.go create mode 100644 server/utils/strings_test.go diff --git a/server/common/parser/strings.go b/server/common/parser/strings.go index 17aaee0..7e3f95b 100644 --- a/server/common/parser/strings.go +++ b/server/common/parser/strings.go @@ -1,16 +1,21 @@ package parser -import "strings" +import ( + "regexp" + "strings" +) type ParseFeatures struct { ParseDoubleQuotes bool ParseEscapedCharacters bool + TrimWhitespace bool Replacements *map[string]string } var FullFeatures = ParseFeatures{ ParseDoubleQuotes: true, ParseEscapedCharacters: true, + TrimWhitespace: false, Replacements: &map[string]string{}, } @@ -29,6 +34,10 @@ func ParseRawString( value = ParseReplacements(value, *features.Replacements) } + if features.TrimWhitespace { + value = TrimWhitespace(value, features.ParseDoubleQuotes) + } + // Parse double quotes if features.ParseDoubleQuotes { value = ParseDoubleQuotes(value) @@ -45,6 +54,49 @@ func ParseRawString( } } +var trimPattern = regexp.MustCompile(`\s+`) + +func TrimWhitespace( + raw string, + respectDoubleQuotes bool, +) string { + if !respectDoubleQuotes { + return trimPattern.ReplaceAllString( + strings.TrimSpace(raw), + " ", + ) + } + + value := raw + currentIndex := 0 + + for { + nextStart, found := findNextDoubleQuote(value, currentIndex) + + if found { + part := value[:nextStart] + value = strings.TrimSpace(part) + value[nextStart:] + } + + nextEnd, found := findNextDoubleQuote(value, nextStart+1) + + if !found { + break + } + + currentIndex = nextEnd + 1 + } + + // last part + if currentIndex < len(value) { + part := value[currentIndex:] + + value = value[:currentIndex] + strings.TrimSpace(part) + } + + return value +} + func ParseDoubleQuotes( raw string, ) string { diff --git a/server/common/parser/strings_test.go b/server/common/parser/strings_test.go index 0a05916..fad0959 100644 --- a/server/common/parser/strings_test.go +++ b/server/common/parser/strings_test.go @@ -54,6 +54,32 @@ func TestStringsMultipleQuotesFullFeatures( } } +func TestTrimWhitespaceNoQuotes( + t *testing.T, +) { + input := " hello world " + expected := "hello world" + + actual := TrimWhitespace(input, false) + + if expected != actual { + t.Errorf("Expected %v, got %v", expected, actual) + } +} + +func TestTrimWhitespaceQuotes( + t *testing.T, +) { + input := ` "hello world" ` + expected := `"hello world"` + + actual := TrimWhitespace(input, true) + + if expected != actual { + t.Errorf("Expected %v, got %v", expected, actual) + } +} + func TestStringsSimpleEscapedFullFeatures( t *testing.T, ) { diff --git a/server/common/strings.go b/server/common/strings.go new file mode 100644 index 0000000..75a0494 --- /dev/null +++ b/server/common/strings.go @@ -0,0 +1,23 @@ +package common + +var UnicodeWhitespace = map[rune]struct{}{ + '\u0020': {}, // Space + '\u0009': {}, // Horizontal tab + '\u000A': {}, // Line feed + '\u000B': {}, // Vertical tab + '\u000C': {}, // Form feed + '\u000D': {}, // Carriage return + '\u0085': {}, // Next line + '\u00A0': {}, // No-break space + '\u1680': {}, // Ogham space mark + '\u2000': {}, // En quad + '\u2001': {}, // Em quad + '\u2002': {}, // En space + '\u2003': {}, // Em space + '\u2004': {}, // Three-per-em space + '\u2005': {}, // Four-per-em space + '\u2006': {}, // Six-per-em space + '\u2007': {}, // Figure space + '\u2008': {}, // Punctuation space + '\u2009': {}, // Thin space +} diff --git a/server/common/virtual-line.go b/server/common/virtual-line.go new file mode 100644 index 0000000..27f7341 --- /dev/null +++ b/server/common/virtual-line.go @@ -0,0 +1,205 @@ +package common + +import ( + "config-lsp/utils" +) + +type VirtualLine struct { + // This is the true location of the text + // This ranges from the start of the first line and character + // to the end of the last line and character + LocationRange + + Parts []VirtualLinePart +} + +func (l VirtualLine) GetText() string { + text := "" + + for _, part := range l.Parts { + text += part.Text + } + + return text +} + +// GetSubset Get a subset of the virtual line starting from `start` and ending at `end` +func (l VirtualLine) GetSubset(start uint32, end uint32) VirtualLine { + parts := make([]VirtualLinePart, 0, 5) + currentIndex := uint32(0) + + for _, part := range l.Parts { + partStart := currentIndex + partEnd := currentIndex + uint32(len(part.Text)) + + if partEnd < start { + continue + } + + if start <= partEnd { + var rangeStart uint32 + var rangeEnd uint32 + + if start >= partStart { + rangeStart = start - partStart + } else { + rangeStart = 0 + } + + if end <= partEnd { + rangeEnd = end - partStart + } else { + rangeEnd = partEnd + } + + parts = append(parts, VirtualLinePart{ + LocationRange: LocationRange{ + Start: Location{ + Line: part.Start.Line, + Character: part.Start.Character + rangeStart, + }, + End: Location{ + Line: part.Start.Line, + Character: part.Start.Character + rangeEnd, + }, + }, + Text: part.Text[rangeStart:rangeEnd], + }) + } + + currentIndex = partEnd + + if currentIndex >= end { + break + } + } + + return VirtualLine{ + LocationRange: LocationRange{ + Start: parts[0].Start, + End: parts[len(parts)-1].End, + }, + Parts: parts, + } +} + +// ConvertRangeToTextRange Convert a given start and end range to a text range +// start and end are the start and end ranges of the virtual line +// This will return the start and end ranges of the actual text lines so that they +// match to the text of the virtual line +// The `start` and `end` are expected to be within the range of the virtual line +func (l VirtualLine) ConvertRangeToTextRange(start uint32, end uint32) []LocationRange { + virtualLine := l.GetSubset(start, end) + + ranges := make([]LocationRange, 0, 5) + + for _, part := range virtualLine.Parts { + ranges = append(ranges, part.LocationRange) + } + + return ranges +} + +func (l VirtualLine) AsTrimmed() VirtualLine { + if len(l.Parts) == 0 { + // There's nothing that could be trimmed, so we can also just + // return the original line, as it's identical + return l + } + + parts := make([]VirtualLinePart, len(l.Parts)) + + for index, part := range l.Parts { + parts[index] = part.AsTrimmed() + } + + return VirtualLine{ + LocationRange: LocationRange{ + Start: parts[0].Start, + End: parts[len(parts)-1].End, + }, + Parts: parts, + } +} + +type VirtualLinePart struct { + // This is the true location of the text + LocationRange + + Text string +} + +func (p VirtualLinePart) AsTrimmed() VirtualLinePart { + firstNonWhitespace := utils.FindFirstNonMatch(p.Text, UnicodeWhitespace, 0) + + if firstNonWhitespace == -1 { + // Empty line + return p + } + + lastNonWhitespace := utils.FindLastNonMatch(p.Text, UnicodeWhitespace, len(p.Text)-1) + + if lastNonWhitespace == -1 { + lastNonWhitespace = len(p.Text) - 1 + } + + return VirtualLinePart{ + LocationRange: LocationRange{ + Start: Location{ + Line: p.Start.Line, + Character: p.Start.Character + uint32(firstNonWhitespace), + }, + End: Location{ + Line: p.Start.Line, + Character: p.Start.Character + uint32(lastNonWhitespace) + 1, + }, + }, + Text: p.Text[firstNonWhitespace : lastNonWhitespace+1], + } +} + +func SplitIntoVirtualLines(input string) []VirtualLine { + stringLines := utils.SplitIntoVirtualLines(input) + + lines := make([]VirtualLine, 0, len(stringLines)) + + for rawLineNumber, line := range stringLines { + parts := make([]VirtualLinePart, 0) + + for virtualLineNumber, part := range line { + if part == "" { + continue + } + + lineNumber := uint32(rawLineNumber) + uint32(virtualLineNumber) + + parts = append(parts, VirtualLinePart{ + LocationRange: LocationRange{ + Start: Location{ + Line: lineNumber, + Character: 0, + }, + End: Location{ + Line: lineNumber, + Character: uint32(len(part)), + }, + }, + Text: part, + }) + } + + if len(parts) == 0 { + continue + } + + lines = append(lines, VirtualLine{ + LocationRange: LocationRange{ + Start: parts[0].Start, + End: parts[len(parts)-1].End, + }, + Parts: parts, + }) + } + + return lines +} diff --git a/server/common/virtual-line_test.go b/server/common/virtual-line_test.go new file mode 100644 index 0000000..c7aa748 --- /dev/null +++ b/server/common/virtual-line_test.go @@ -0,0 +1,182 @@ +package common + +import ( + "config-lsp/utils" + "github.com/google/go-cmp/cmp" + "testing" +) + +func TestSplitIntoVirtualLinesSimpleExample( + t *testing.T, +) { + input := utils.Dedent(` +Hello +World \ +how are you +`) + expected := []VirtualLine{ + { + LocationRange: LocationRange{ + Start: Location{ + Line: 0, + Character: 0, + }, + End: Location{ + Line: 0, + Character: 5, + }, + }, + Parts: []VirtualLinePart{ + { + LocationRange: LocationRange{ + Start: Location{ + Line: 0, + Character: 0, + }, + End: Location{ + Line: 0, + Character: 5, + }, + }, + Text: "Hello", + }, + }, + }, + { + LocationRange: LocationRange{ + Start: Location{ + Line: 1, + Character: 0, + }, + End: Location{ + Line: 2, + Character: 11, + }, + }, + Parts: []VirtualLinePart{ + { + LocationRange: LocationRange{ + Start: Location{ + Line: 1, + Character: 0, + }, + End: Location{ + Line: 1, + Character: 6, + }, + }, + Text: "World ", + }, + { + LocationRange: LocationRange{ + Start: Location{ + Line: 2, + Character: 0, + }, + End: Location{ + Line: 2, + Character: 11, + }, + }, + Text: "how are you", + }, + }, + }, + } + + lines := SplitIntoVirtualLines(input) + + if !cmp.Equal(expected, lines) { + t.Fatalf("Expected %v, got %v", expected, lines) + } + + expectedText := "World how are you" + actualText := lines[1].GetText() + + if expectedText != actualText { + t.Fatalf("Expected %v, got %v", expectedText, actualText) + } + + expectedText = "rld how are" + actualText = lines[1].GetText()[2:13] + + if expectedText != actualText { + t.Fatalf("Expected %v, got %v", expectedText, actualText) + } + + expectedRanges := []LocationRange{ + { + Start: Location{ + Line: 1, + Character: 2, + }, + End: Location{ + Line: 1, + Character: 6, + }, + }, + { + Start: Location{ + Line: 2, + Character: 0, + }, + End: Location{ + Line: 2, + Character: 7, + }, + }, + } + actualRanges := lines[1].ConvertRangeToTextRange(2, 13) + + if !cmp.Equal(expectedRanges, actualRanges) { + t.Fatalf("Expected %v, got %v", expectedRanges, actualRanges) + } +} + +func TestSplitIntoVirtualLinesIndentedExample( + t *testing.T, +) { + // 4 spaces + input := utils.Dedent(` + Hello +`) + expected := []VirtualLine{ + { + LocationRange: LocationRange{ + Start: Location{ + Line: 0, + Character: 4, + }, + End: Location{ + Line: 0, + Character: 9, + }, + }, + Parts: []VirtualLinePart{ + { + LocationRange: LocationRange{ + Start: Location{ + Line: 0, + Character: 4, + }, + End: Location{ + Line: 0, + Character: 9, + }, + }, + Text: "Hello", + }, + }, + }, + } + + actual := SplitIntoVirtualLines(input) + + for index, line := range actual { + actual[index] = line.AsTrimmed() + } + + if !cmp.Equal(expected, actual) { + t.Fatalf("Expected %v, got %v", expected, actual) + } +} diff --git a/server/go.mod b/server/go.mod index 8c84359..cb117fe 100644 --- a/server/go.mod +++ b/server/go.mod @@ -3,15 +3,16 @@ module config-lsp go 1.22.5 require ( + github.com/antlr4-go/antlr/v4 v4.13.1 + github.com/emirpasic/gods v1.18.1 github.com/tliron/commonlog v0.2.17 github.com/tliron/glsp v0.2.2 + golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 + github.com/google/go-cmp v0.6.0 ) require ( - github.com/antlr4-go/antlr/v4 v4.13.1 // indirect github.com/aymanbagabas/go-osc52/v2 v2.0.1 // indirect - github.com/emirpasic/gods v1.18.1 // indirect - github.com/google/go-cmp v0.6.0 // indirect github.com/gorilla/websocket v1.5.3 // indirect github.com/iancoleman/strcase v0.3.0 // indirect github.com/k0kubun/pp v3.0.1+incompatible // indirect @@ -28,7 +29,6 @@ require ( github.com/sourcegraph/jsonrpc2 v0.2.0 // indirect github.com/tliron/kutil v0.3.24 // indirect golang.org/x/crypto v0.25.0 // indirect - golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect golang.org/x/sys v0.22.0 // indirect golang.org/x/term v0.22.0 // indirect ) diff --git a/server/go.sum b/server/go.sum index 100a1f7..70ef9af 100644 --- a/server/go.sum +++ b/server/go.sum @@ -4,6 +4,8 @@ github.com/aymanbagabas/go-osc52/v2 v2.0.1 h1:HwpRHbFMcZLEVr42D4p7XBqjyuxQH5SMiE github.com/aymanbagabas/go-osc52/v2 v2.0.1/go.mod h1:uYgXzlJ7ZpABp8OJ+exZzJJhRNQ2ASbcXHWsFqH8hp8= github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ= +github.com/fatih/color v1.17.0 h1:GlRw1BRJxkpqUCBKzKOw098ed57fEsKeNjpTe3cSjK4= +github.com/fatih/color v1.17.0/go.mod h1:YZ7TlrGPkiz6ku9fK3TLD/pl3CpsiFyu8N92HLgmosI= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/gorilla/websocket v1.4.1/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= diff --git a/server/utils/strings.go b/server/utils/strings.go index 3f7f851..192f34a 100644 --- a/server/utils/strings.go +++ b/server/utils/strings.go @@ -76,3 +76,67 @@ func AllIndexes(s string, sub string) []int { return indexes } + +func FindFirstNonMatch(s string, substr map[rune]struct{}, startIndex int) int { + for index := startIndex; index < len(s); index++ { + if _, found := substr[rune(s[index])]; !found { + return index + } + } + + return -1 +} + +func FindLastNonMatch(s string, substr map[rune]struct{}, startIndex int) int { + for index := startIndex; index >= 0; index-- { + if _, found := substr[rune(s[index])]; !found { + return index + } + } + + return -1 +} + +var lineContinuationPattern = regexp.MustCompile(`\\\s*$`) + +// SplitSmartlyIntoLines Split a string into lines while respecting "\" as a line continuation character +// This function is useful for parsing configuration files +// You will need to handle whitespace trimming yourself +// For example, the following input: +// ``` +// key1 = value1 +// +// key2 = value2 \ +// value3 +// +// key3 = value4 +// ``` +// Will be split into: +// ```go +// +// [][]string{ +// []string{"key1 = value1"}, +// []string{"key2 = value2 ", " value3"}, +// []string{"key3 = value4"}, +// } +func SplitIntoVirtualLines(input string) [][]string { + lines := make([][]string, 0, len(input)) + currentLine := make([]string, 0, 1) + + for _, line := range SplitIntoLines(input) { + if lineContinuationPattern.MatchString(line) { + currentLine = append(currentLine, line[:len(line)-1]) + continue + } + + currentLine = append(currentLine, line) + lines = append(lines, currentLine) + currentLine = make([]string, 0, 1) + } + + if len(currentLine) > 0 { + lines = append(lines, currentLine) + } + + return lines +} diff --git a/server/utils/strings_test.go b/server/utils/strings_test.go new file mode 100644 index 0000000..81b6287 --- /dev/null +++ b/server/utils/strings_test.go @@ -0,0 +1,119 @@ +package utils + +import ( + "github.com/google/go-cmp/cmp" + "testing" +) + +func TestSplitIntoVirtualLinesSimpleExample( + t *testing.T, +) { + input := Dedent(` +Hello +World\ +how are you +`) + expected := [][]string{ + {"Hello"}, + {"World", "how are you"}, + } + + actual := SplitIntoVirtualLines(input) + + if cmp.Equal(expected, actual) { + t.Fatalf("Expected %v, got %v", expected, actual) + } +} + +func TestSplitIntoVirtualLinesEmptyString(t *testing.T) { + input := "" + expected := [][]string{ + {""}, + } + + actual := SplitIntoVirtualLines(input) + + if !cmp.Equal(expected, actual) { + t.Fatalf("Expected %v, got %v", expected, actual) + } +} + +func TestSplitIntoVirtualLinesSingleLine(t *testing.T) { + input := Dedent(` + Hello`) + expected := [][]string{ + {" Hello"}, + } + + actual := SplitIntoVirtualLines(input) + + if !cmp.Equal(expected, actual) { + t.Fatalf("Expected %v, got %v", expected, actual) + } +} + +func TestSplitIntoVirtualLinesMultipleLinesWithoutContinuation(t *testing.T) { + input := Dedent(` + Hello + World + How are you`) + expected := [][]string{ + {" Hello"}, + {" World"}, + {" How are you"}, + } + + actual := SplitIntoVirtualLines(input) + + if !cmp.Equal(expected, actual) { + t.Fatalf("Expected %v, got %v", expected, actual) + } +} + +func TestSplitIntoVirtualLinesMultipleLinesWithContinuation(t *testing.T) { + input := Dedent(` + Hello \ +World \ +How are you`) + expected := [][]string{ + {" Hello ", "World ", "How are you"}, + } + + actual := SplitIntoVirtualLines(input) + + if !cmp.Equal(expected, actual) { + t.Fatalf("Expected %v, got %v", expected, actual) + } +} + +func TestSplitIntoVirtualLinesMixedContinuation(t *testing.T) { + input := Dedent(` +Hello +World\ + How are you`) + expected := [][]string{ + {"Hello"}, + {"World", " How are you"}, + } + + actual := SplitIntoVirtualLines(input) + + if !cmp.Equal(expected, actual) { + t.Fatalf("Expected %v, got %v", expected, actual) + } +} + +func TestSplitIntoVirtualLinesTrailingContinuation(t *testing.T) { + input := Dedent(` +Hello\ + `) + expected := [][]string{ + {"Hello", " "}, + } + + actual := SplitIntoVirtualLines(input) + + if !cmp.Equal(expected, actual) { + t.Fatalf("Expected %v, got %v", expected, actual) + } +}