From ec521c3c91dad2934313a95034f2d486f910085f Mon Sep 17 00:00:00 2001 From: Paul Payne Date: Sat, 11 Oct 2025 17:06:14 +0000 Subject: [PATCH] Initial commit. --- .gitignore | 24 + LICENSE | 661 ++++++++++++++++++++++++ README.md | 168 +++++++ go.mod | 8 + go.sum | 6 + internal/api/v1/handlers.go | 466 +++++++++++++++++ internal/api/v1/handlers_apps.go | 206 ++++++++ internal/api/v1/handlers_backup.go | 110 ++++ internal/api/v1/handlers_cluster.go | 331 ++++++++++++ internal/api/v1/handlers_config.go | 76 +++ internal/api/v1/handlers_node.go | 317 ++++++++++++ internal/api/v1/handlers_operations.go | 156 ++++++ internal/api/v1/handlers_pxe.go | 141 ++++++ internal/api/v1/handlers_services.go | 424 ++++++++++++++++ internal/api/v1/handlers_utilities.go | 151 ++++++ internal/apps/apps.go | 528 +++++++++++++++++++ internal/backup/backup.go | 494 ++++++++++++++++++ internal/cluster/cluster.go | 518 +++++++++++++++++++ internal/config/config.go | 168 +++++++ internal/config/manager.go | 167 +++++++ internal/context/context.go | 140 ++++++ internal/context/context_test.go | 100 ++++ internal/data/paths.go | 105 ++++ internal/discovery/discovery.go | 247 +++++++++ internal/dnsmasq/config.go | 73 +++ internal/instance/instance.go | 251 ++++++++++ internal/instance/instance_test.go | 176 +++++++ internal/node/node.go | 668 +++++++++++++++++++++++++ internal/operations/broadcaster.go | 75 +++ internal/operations/operations.go | 255 ++++++++++ internal/pxe/pxe.go | 220 ++++++++ internal/secrets/secrets.go | 166 ++++++ internal/secrets/secrets_test.go | 121 +++++ internal/services/broadcast_writer.go | 66 +++ internal/services/manifest.go | 122 +++++ internal/services/services.go | 631 +++++++++++++++++++++++ internal/storage/storage.go | 110 ++++ internal/storage/storage_test.go | 107 ++++ internal/tools/context.go | 37 ++ internal/tools/gomplate.go | 111 ++++ internal/tools/kubectl.go | 33 ++ internal/tools/talosctl.go | 362 ++++++++++++++ internal/tools/yq.go | 133 +++++ internal/utilities/utilities.go | 300 +++++++++++ main.go | 69 +++ 45 files changed, 9798 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 go.mod create mode 100644 go.sum create mode 100644 internal/api/v1/handlers.go create mode 100644 internal/api/v1/handlers_apps.go create mode 100644 internal/api/v1/handlers_backup.go create mode 100644 internal/api/v1/handlers_cluster.go create mode 100644 internal/api/v1/handlers_config.go create mode 100644 internal/api/v1/handlers_node.go create mode 100644 internal/api/v1/handlers_operations.go create mode 100644 internal/api/v1/handlers_pxe.go create mode 100644 internal/api/v1/handlers_services.go create mode 100644 internal/api/v1/handlers_utilities.go create mode 100644 internal/apps/apps.go create mode 100644 internal/backup/backup.go create mode 100644 internal/cluster/cluster.go create mode 100644 internal/config/config.go create mode 100644 internal/config/manager.go create mode 100644 internal/context/context.go create mode 100644 internal/context/context_test.go create mode 100644 internal/data/paths.go create mode 100644 internal/discovery/discovery.go create mode 100644 internal/dnsmasq/config.go create mode 100644 internal/instance/instance.go create mode 100644 internal/instance/instance_test.go create mode 100644 internal/node/node.go create mode 100644 internal/operations/broadcaster.go create mode 100644 internal/operations/operations.go create mode 100644 internal/pxe/pxe.go create mode 100644 internal/secrets/secrets.go create mode 100644 internal/secrets/secrets_test.go create mode 100644 internal/services/broadcast_writer.go create mode 100644 internal/services/manifest.go create mode 100644 internal/services/services.go create mode 100644 internal/storage/storage.go create mode 100644 internal/storage/storage_test.go create mode 100644 internal/tools/context.go create mode 100644 internal/tools/gomplate.go create mode 100644 internal/tools/kubectl.go create mode 100644 internal/tools/talosctl.go create mode 100644 internal/tools/yq.go create mode 100644 internal/utilities/utilities.go create mode 100644 main.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..80179f1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,24 @@ +# Daemon build artifacts +build/ +dist/ +wildd +*.deb +__debug* + +# Go build cache +*.o +*.a +*.so +__debug* + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool +*.out +*.cover +*.coverage + +# Go workspace file +go.work +go.work.sum diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0ad25db --- /dev/null +++ b/LICENSE @@ -0,0 +1,661 @@ + GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published + by the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +. diff --git a/README.md b/README.md new file mode 100644 index 0000000..03c686e --- /dev/null +++ b/README.md @@ -0,0 +1,168 @@ +# Wild Central Daemon + +The Wild Central Daemon is a lightweight service that runs on a local machine (e.g., a Raspberry Pi) to manage Wild Cloud instances on the local network. It provides an interface for users to interact with and manage their Wild Cloud environments. + +## Development + +```bash +make dev +``` + +## Usage + +### Batch Configuration Update Endpoint + +#### Overview + +The batch configuration update endpoint allows updating multiple configuration values in a single atomic request. + +#### Endpoint + +``` +PATCH /api/v1/instances/{name}/config +``` + +#### Request Format + +```json +{ + "updates": [ + {"path": "string", "value": "any"}, + {"path": "string", "value": "any"} + ] +} +``` + +#### Response Format + +Success (200 OK): +```json +{ + "message": "Configuration updated successfully", + "updated": 3 +} +``` + +Error (400 Bad Request / 404 Not Found / 500 Internal Server Error): +```json +{ + "error": "error message" +} +``` + +#### Usage Examples + +##### Example 1: Update Basic Configuration Values + +```bash +curl -X PATCH http://localhost:8080/api/v1/instances/my-cloud/config \ + -H "Content-Type: application/json" \ + -d '{ + "updates": [ + {"path": "baseDomain", "value": "example.com"}, + {"path": "domain", "value": "wild.example.com"}, + {"path": "internalDomain", "value": "int.wild.example.com"} + ] + }' +``` + +Response: +```json +{ + "message": "Configuration updated successfully", + "updated": 3 +} +``` + +##### Example 2: Update Nested Configuration Values + +```bash +curl -X PATCH http://localhost:8080/api/v1/instances/my-cloud/config \ + -H "Content-Type: application/json" \ + -d '{ + "updates": [ + {"path": "cluster.name", "value": "prod-cluster"}, + {"path": "cluster.loadBalancerIp", "value": "192.168.1.100"}, + {"path": "cluster.ipAddressPool", "value": "192.168.1.100-192.168.1.200"} + ] + }' +``` + +##### Example 3: Update Array Values + +```bash +curl -X PATCH http://localhost:8080/api/v1/instances/my-cloud/config \ + -H "Content-Type: application/json" \ + -d '{ + "updates": [ + {"path": "cluster.nodes.activeNodes[0]", "value": "node-1"}, + {"path": "cluster.nodes.activeNodes[1]", "value": "node-2"} + ] + }' +``` + +##### Example 4: Error Handling - Invalid Instance + +```bash +curl -X PATCH http://localhost:8080/api/v1/instances/nonexistent/config \ + -H "Content-Type: application/json" \ + -d '{ + "updates": [ + {"path": "baseDomain", "value": "example.com"} + ] + }' +``` + +Response (404): +```json +{ + "error": "Instance not found: instance nonexistent does not exist" +} +``` + +##### Example 5: Error Handling - Empty Updates + +```bash +curl -X PATCH http://localhost:8080/api/v1/instances/my-cloud/config \ + -H "Content-Type: application/json" \ + -d '{ + "updates": [] + }' +``` + +Response (400): +```json +{ + "error": "updates array is required and cannot be empty" +} +``` + +##### Example 6: Error Handling - Missing Path + +```bash +curl -X PATCH http://localhost:8080/api/v1/instances/my-cloud/config \ + -H "Content-Type: application/json" \ + -d '{ + "updates": [ + {"path": "", "value": "example.com"} + ] + }' +``` + +Response (400): +```json +{ + "error": "update[0]: path is required" +} +``` + +#### Configuration Path Syntax + +The `path` field uses YAML path syntax as implemented by the `yq` tool: + +- Simple fields: `baseDomain` +- Nested fields: `cluster.name` +- Array elements: `cluster.nodes.activeNodes[0]` +- Array append: `cluster.nodes.activeNodes[+]` + +Refer to the yq documentation for advanced path syntax. diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..2af3563 --- /dev/null +++ b/go.mod @@ -0,0 +1,8 @@ +module github.com/wild-cloud/wild-central/daemon + +go 1.24 + +require ( + github.com/gorilla/mux v1.8.1 + gopkg.in/yaml.v3 v3.0.1 +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..3e12cf5 --- /dev/null +++ b/go.sum @@ -0,0 +1,6 @@ +github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= +github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/api/v1/handlers.go b/internal/api/v1/handlers.go new file mode 100644 index 0000000..2b99613 --- /dev/null +++ b/internal/api/v1/handlers.go @@ -0,0 +1,466 @@ +package v1 + +import ( + "encoding/json" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + "time" + + "github.com/gorilla/mux" + "gopkg.in/yaml.v3" + + "github.com/wild-cloud/wild-central/daemon/internal/config" + "github.com/wild-cloud/wild-central/daemon/internal/context" + "github.com/wild-cloud/wild-central/daemon/internal/instance" + "github.com/wild-cloud/wild-central/daemon/internal/operations" + "github.com/wild-cloud/wild-central/daemon/internal/secrets" +) + +// API holds all dependencies for API handlers +type API struct { + dataDir string + directoryPath string // Path to Wild Cloud Directory + appsDir string + config *config.Manager + secrets *secrets.Manager + context *context.Manager + instance *instance.Manager + broadcaster *operations.Broadcaster // SSE broadcaster for operation output +} + +// NewAPI creates a new API handler with all dependencies +func NewAPI(dataDir, directoryPath string) (*API, error) { + // Ensure base directories exist + instancesDir := filepath.Join(dataDir, "instances") + if err := os.MkdirAll(instancesDir, 0755); err != nil { + return nil, fmt.Errorf("failed to create instances directory: %w", err) + } + + // Apps directory is now in Wild Cloud Directory + appsDir := filepath.Join(directoryPath, "apps") + + return &API{ + dataDir: dataDir, + directoryPath: directoryPath, + appsDir: appsDir, + config: config.NewManager(), + secrets: secrets.NewManager(), + context: context.NewManager(dataDir), + instance: instance.NewManager(dataDir), + broadcaster: operations.NewBroadcaster(), + }, nil +} + +// RegisterRoutes registers all API routes (Phase 1 + Phase 2) +func (api *API) RegisterRoutes(r *mux.Router) { + // Phase 1: Instance management + r.HandleFunc("/api/v1/instances", api.CreateInstance).Methods("POST") + r.HandleFunc("/api/v1/instances", api.ListInstances).Methods("GET") + r.HandleFunc("/api/v1/instances/{name}", api.GetInstance).Methods("GET") + r.HandleFunc("/api/v1/instances/{name}", api.DeleteInstance).Methods("DELETE") + + // Phase 1: Config management + r.HandleFunc("/api/v1/instances/{name}/config", api.GetConfig).Methods("GET") + r.HandleFunc("/api/v1/instances/{name}/config", api.UpdateConfig).Methods("PUT") + r.HandleFunc("/api/v1/instances/{name}/config", api.ConfigUpdateBatch).Methods("PATCH") + + // Phase 1: Secrets management + r.HandleFunc("/api/v1/instances/{name}/secrets", api.GetSecrets).Methods("GET") + r.HandleFunc("/api/v1/instances/{name}/secrets", api.UpdateSecrets).Methods("PUT") + + // Phase 1: Context management + r.HandleFunc("/api/v1/context", api.GetContext).Methods("GET") + r.HandleFunc("/api/v1/context", api.SetContext).Methods("POST") + + // Phase 2: Node management + r.HandleFunc("/api/v1/instances/{name}/nodes/discover", api.NodeDiscover).Methods("POST") + r.HandleFunc("/api/v1/instances/{name}/nodes/detect", api.NodeDetect).Methods("POST") + r.HandleFunc("/api/v1/instances/{name}/discovery", api.NodeDiscoveryStatus).Methods("GET") + r.HandleFunc("/api/v1/instances/{name}/nodes/hardware/{ip}", api.NodeHardware).Methods("GET") + r.HandleFunc("/api/v1/instances/{name}/nodes/fetch-templates", api.NodeFetchTemplates).Methods("POST") + r.HandleFunc("/api/v1/instances/{name}/nodes", api.NodeAdd).Methods("POST") + r.HandleFunc("/api/v1/instances/{name}/nodes", api.NodeList).Methods("GET") + r.HandleFunc("/api/v1/instances/{name}/nodes/{node}", api.NodeGet).Methods("GET") + r.HandleFunc("/api/v1/instances/{name}/nodes/{node}", api.NodeUpdate).Methods("PUT") + r.HandleFunc("/api/v1/instances/{name}/nodes/{node}/apply", api.NodeApply).Methods("POST") + r.HandleFunc("/api/v1/instances/{name}/nodes/{node}", api.NodeDelete).Methods("DELETE") + + // Phase 2: PXE asset management + r.HandleFunc("/api/v1/instances/{name}/pxe/assets", api.PXEListAssets).Methods("GET") + r.HandleFunc("/api/v1/instances/{name}/pxe/assets/download", api.PXEDownloadAsset).Methods("POST") + r.HandleFunc("/api/v1/instances/{name}/pxe/assets/{type}", api.PXEGetAsset).Methods("GET") + r.HandleFunc("/api/v1/instances/{name}/pxe/assets/{type}", api.PXEDeleteAsset).Methods("DELETE") + + // Phase 2: Operations + r.HandleFunc("/api/v1/instances/{name}/operations", api.OperationList).Methods("GET") + r.HandleFunc("/api/v1/operations/{id}", api.OperationGet).Methods("GET") + r.HandleFunc("/api/v1/operations/{id}/stream", api.OperationStream).Methods("GET") + r.HandleFunc("/api/v1/operations/{id}/cancel", api.OperationCancel).Methods("POST") + + // Phase 3: Cluster operations + r.HandleFunc("/api/v1/instances/{name}/cluster/config/generate", api.ClusterGenerateConfig).Methods("POST") + r.HandleFunc("/api/v1/instances/{name}/cluster/bootstrap", api.ClusterBootstrap).Methods("POST") + r.HandleFunc("/api/v1/instances/{name}/cluster/endpoints", api.ClusterConfigureEndpoints).Methods("POST") + r.HandleFunc("/api/v1/instances/{name}/cluster/status", api.ClusterGetStatus).Methods("GET") + r.HandleFunc("/api/v1/instances/{name}/cluster/health", api.ClusterHealth).Methods("GET") + r.HandleFunc("/api/v1/instances/{name}/cluster/kubeconfig", api.ClusterGetKubeconfig).Methods("GET") + r.HandleFunc("/api/v1/instances/{name}/cluster/kubeconfig/generate", api.ClusterGenerateKubeconfig).Methods("POST") + r.HandleFunc("/api/v1/instances/{name}/cluster/talosconfig", api.ClusterGetTalosconfig).Methods("GET") + r.HandleFunc("/api/v1/instances/{name}/cluster/reset", api.ClusterReset).Methods("POST") + + // Phase 4: Services + r.HandleFunc("/api/v1/instances/{name}/services", api.ServicesList).Methods("GET") + r.HandleFunc("/api/v1/instances/{name}/services", api.ServicesInstall).Methods("POST") + r.HandleFunc("/api/v1/instances/{name}/services/install-all", api.ServicesInstallAll).Methods("POST") + r.HandleFunc("/api/v1/instances/{name}/services/{service}", api.ServicesGet).Methods("GET") + r.HandleFunc("/api/v1/instances/{name}/services/{service}", api.ServicesDelete).Methods("DELETE") + r.HandleFunc("/api/v1/instances/{name}/services/{service}/status", api.ServicesGetStatus).Methods("GET") + r.HandleFunc("/api/v1/services/{service}/manifest", api.ServicesGetManifest).Methods("GET") + r.HandleFunc("/api/v1/services/{service}/config", api.ServicesGetConfig).Methods("GET") + r.HandleFunc("/api/v1/instances/{name}/services/{service}/config", api.ServicesGetInstanceConfig).Methods("GET") + + // Service lifecycle endpoints + r.HandleFunc("/api/v1/instances/{name}/services/{service}/fetch", api.ServicesFetch).Methods("POST") + r.HandleFunc("/api/v1/instances/{name}/services/{service}/compile", api.ServicesCompile).Methods("POST") + r.HandleFunc("/api/v1/instances/{name}/services/{service}/deploy", api.ServicesDeploy).Methods("POST") + + // Phase 4: Apps + r.HandleFunc("/api/v1/apps", api.AppsListAvailable).Methods("GET") + r.HandleFunc("/api/v1/apps/{app}", api.AppsGetAvailable).Methods("GET") + r.HandleFunc("/api/v1/instances/{name}/apps", api.AppsListDeployed).Methods("GET") + r.HandleFunc("/api/v1/instances/{name}/apps", api.AppsAdd).Methods("POST") + r.HandleFunc("/api/v1/instances/{name}/apps/{app}/deploy", api.AppsDeploy).Methods("POST") + r.HandleFunc("/api/v1/instances/{name}/apps/{app}", api.AppsDelete).Methods("DELETE") + r.HandleFunc("/api/v1/instances/{name}/apps/{app}/status", api.AppsGetStatus).Methods("GET") + + // Phase 5: Backup & Restore + r.HandleFunc("/api/v1/instances/{name}/apps/{app}/backup", api.BackupAppStart).Methods("POST") + r.HandleFunc("/api/v1/instances/{name}/apps/{app}/backup", api.BackupAppList).Methods("GET") + r.HandleFunc("/api/v1/instances/{name}/apps/{app}/restore", api.BackupAppRestore).Methods("POST") + + // Phase 5: Utilities + r.HandleFunc("/api/v1/utilities/health", api.UtilitiesHealth).Methods("GET") + r.HandleFunc("/api/v1/instances/{name}/utilities/health", api.InstanceUtilitiesHealth).Methods("GET") + r.HandleFunc("/api/v1/utilities/dashboard/token", api.UtilitiesDashboardToken).Methods("GET") + r.HandleFunc("/api/v1/utilities/nodes/ips", api.UtilitiesNodeIPs).Methods("GET") + r.HandleFunc("/api/v1/utilities/controlplane/ip", api.UtilitiesControlPlaneIP).Methods("GET") + r.HandleFunc("/api/v1/utilities/secrets/{secret}/copy", api.UtilitiesSecretCopy).Methods("POST") + r.HandleFunc("/api/v1/utilities/version", api.UtilitiesVersion).Methods("GET") +} + +// CreateInstance creates a new instance +func (api *API) CreateInstance(w http.ResponseWriter, r *http.Request) { + var req struct { + Name string `json:"name"` + } + + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + respondError(w, http.StatusBadRequest, "Invalid request body") + return + } + + if req.Name == "" { + respondError(w, http.StatusBadRequest, "Instance name is required") + return + } + + if err := api.instance.CreateInstance(req.Name); err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to create instance: %v", err)) + return + } + + respondJSON(w, http.StatusCreated, map[string]string{ + "name": req.Name, + "message": "Instance created successfully", + }) +} + +// ListInstances lists all instances +func (api *API) ListInstances(w http.ResponseWriter, r *http.Request) { + instances, err := api.instance.ListInstances() + if err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to list instances: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]interface{}{ + "instances": instances, + }) +} + +// GetInstance retrieves instance details +func (api *API) GetInstance(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + name := vars["name"] + + if err := api.instance.ValidateInstance(name); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Get config + configPath := api.instance.GetInstanceConfigPath(name) + configData, err := os.ReadFile(configPath) + if err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to read config: %v", err)) + return + } + + var configMap map[string]interface{} + if err := yaml.Unmarshal(configData, &configMap); err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to parse config: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]interface{}{ + "name": name, + "config": configMap, + }) +} + +// DeleteInstance deletes an instance +func (api *API) DeleteInstance(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + name := vars["name"] + + if err := api.instance.DeleteInstance(name); err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to delete instance: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]string{ + "message": "Instance deleted successfully", + }) +} + +// GetConfig retrieves instance configuration +func (api *API) GetConfig(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + name := vars["name"] + + if err := api.instance.ValidateInstance(name); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + configPath := api.instance.GetInstanceConfigPath(name) + configData, err := os.ReadFile(configPath) + if err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to read config: %v", err)) + return + } + + var configMap map[string]interface{} + if err := yaml.Unmarshal(configData, &configMap); err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to parse config: %v", err)) + return + } + + respondJSON(w, http.StatusOK, configMap) +} + +// UpdateConfig updates instance configuration +func (api *API) UpdateConfig(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + name := vars["name"] + + if err := api.instance.ValidateInstance(name); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + body, err := io.ReadAll(r.Body) + if err != nil { + respondError(w, http.StatusBadRequest, "Failed to read request body") + return + } + + var updates map[string]interface{} + if err := yaml.Unmarshal(body, &updates); err != nil { + respondError(w, http.StatusBadRequest, fmt.Sprintf("Invalid YAML: %v", err)) + return + } + + configPath := api.instance.GetInstanceConfigPath(name) + + // Update each key-value pair + for key, value := range updates { + valueStr := fmt.Sprintf("%v", value) + if err := api.config.SetConfigValue(configPath, key, valueStr); err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to update config key %s: %v", key, err)) + return + } + } + + respondJSON(w, http.StatusOK, map[string]string{ + "message": "Config updated successfully", + }) +} + +// GetSecrets retrieves instance secrets (redacted by default) +func (api *API) GetSecrets(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + name := vars["name"] + + if err := api.instance.ValidateInstance(name); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + secretsPath := api.instance.GetInstanceSecretsPath(name) + + secretsData, err := os.ReadFile(secretsPath) + if err != nil { + if os.IsNotExist(err) { + respondJSON(w, http.StatusOK, map[string]interface{}{}) + return + } + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to read secrets: %v", err)) + return + } + + var secretsMap map[string]interface{} + if err := yaml.Unmarshal(secretsData, &secretsMap); err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to parse secrets: %v", err)) + return + } + + // Check if client wants raw secrets (dangerous!) + showRaw := r.URL.Query().Get("raw") == "true" + + if !showRaw { + // Redact secrets + for key := range secretsMap { + secretsMap[key] = "********" + } + } + + respondJSON(w, http.StatusOK, secretsMap) +} + +// UpdateSecrets updates instance secrets +func (api *API) UpdateSecrets(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + name := vars["name"] + + if err := api.instance.ValidateInstance(name); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + body, err := io.ReadAll(r.Body) + if err != nil { + respondError(w, http.StatusBadRequest, "Failed to read request body") + return + } + + var updates map[string]interface{} + if err := yaml.Unmarshal(body, &updates); err != nil { + respondError(w, http.StatusBadRequest, fmt.Sprintf("Invalid YAML: %v", err)) + return + } + + // Get secrets file path + secretsPath := api.instance.GetInstanceSecretsPath(name) + + // Update each secret + for key, value := range updates { + valueStr := fmt.Sprintf("%v", value) + if err := api.secrets.SetSecret(secretsPath, key, valueStr); err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to update secret %s: %v", key, err)) + return + } + } + + respondJSON(w, http.StatusOK, map[string]string{ + "message": "Secrets updated successfully", + }) +} + +// GetContext retrieves current context +func (api *API) GetContext(w http.ResponseWriter, r *http.Request) { + currentContext, err := api.context.GetCurrentContext() + if err != nil { + if os.IsNotExist(err) { + respondJSON(w, http.StatusOK, map[string]interface{}{ + "context": nil, + }) + return + } + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to get context: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]interface{}{ + "context": currentContext, + }) +} + +// SetContext sets current context +func (api *API) SetContext(w http.ResponseWriter, r *http.Request) { + var req struct { + Context string `json:"context"` + } + + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + respondError(w, http.StatusBadRequest, "Invalid request body") + return + } + + if req.Context == "" { + respondError(w, http.StatusBadRequest, "Context name is required") + return + } + + if err := api.context.SetCurrentContext(req.Context); err != nil { + respondError(w, http.StatusBadRequest, fmt.Sprintf("Failed to set context: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]string{ + "context": req.Context, + "message": "Context set successfully", + }) +} + +// StatusHandler returns daemon status information +func (api *API) StatusHandler(w http.ResponseWriter, r *http.Request, startTime time.Time, dataDir, directoryPath string) { + // Get list of instances + instances, err := api.instance.ListInstances() + if err != nil { + instances = []string{} + } + + // Calculate uptime + uptime := time.Since(startTime) + + respondJSON(w, http.StatusOK, map[string]interface{}{ + "status": "running", + "version": "0.1.0", // TODO: Get from build info + "uptime": uptime.String(), + "uptimeSeconds": int(uptime.Seconds()), + "dataDir": dataDir, + "directoryPath": directoryPath, + "instances": map[string]interface{}{ + "count": len(instances), + "names": instances, + }, + }) +} + +// Helper functions + +func respondJSON(w http.ResponseWriter, status int, data interface{}) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + json.NewEncoder(w).Encode(data) +} + +func respondError(w http.ResponseWriter, status int, message string) { + respondJSON(w, status, map[string]string{ + "error": message, + }) +} diff --git a/internal/api/v1/handlers_apps.go b/internal/api/v1/handlers_apps.go new file mode 100644 index 0000000..3511b32 --- /dev/null +++ b/internal/api/v1/handlers_apps.go @@ -0,0 +1,206 @@ +package v1 + +import ( + "encoding/json" + "fmt" + "net/http" + + "github.com/gorilla/mux" + + "github.com/wild-cloud/wild-central/daemon/internal/apps" + "github.com/wild-cloud/wild-central/daemon/internal/operations" +) + +// AppsListAvailable lists all available apps +func (api *API) AppsListAvailable(w http.ResponseWriter, r *http.Request) { + // List available apps from apps directory + appsMgr := apps.NewManager(api.dataDir, api.appsDir) + appList, err := appsMgr.ListAvailable() + if err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to list apps: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]interface{}{ + "apps": appList, + }) +} + +// AppsGetAvailable returns details for an available app +func (api *API) AppsGetAvailable(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + appName := vars["app"] + + // Get app details + appsMgr := apps.NewManager(api.dataDir, api.appsDir) + app, err := appsMgr.Get(appName) + if err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("App not found: %v", err)) + return + } + + respondJSON(w, http.StatusOK, app) +} + +// AppsListDeployed lists deployed apps for an instance +func (api *API) AppsListDeployed(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // List deployed apps + appsMgr := apps.NewManager(api.dataDir, api.appsDir) + deployedApps, err := appsMgr.ListDeployed(instanceName) + if err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to list apps: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]interface{}{ + "apps": deployedApps, + }) +} + +// AppsAdd adds an app to instance configuration +func (api *API) AppsAdd(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Parse request + var req struct { + Name string `json:"name"` + Config map[string]string `json:"config"` + } + + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + respondError(w, http.StatusBadRequest, "Invalid request body") + return + } + + if req.Name == "" { + respondError(w, http.StatusBadRequest, "app name is required") + return + } + + // Add app + appsMgr := apps.NewManager(api.dataDir, api.appsDir) + if err := appsMgr.Add(instanceName, req.Name, req.Config); err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to add app: %v", err)) + return + } + + respondJSON(w, http.StatusCreated, map[string]string{ + "message": "App added to configuration", + "app": req.Name, + }) +} + +// AppsDeploy deploys an app to the cluster +func (api *API) AppsDeploy(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + appName := vars["app"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Start deploy operation + opsMgr := operations.NewManager(api.dataDir) + opID, err := opsMgr.Start(instanceName, "deploy_app", appName) + if err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to start operation: %v", err)) + return + } + + // Deploy in background + go func() { + appsMgr := apps.NewManager(api.dataDir, api.appsDir) + opsMgr.UpdateStatus(instanceName, opID, "running") + + if err := appsMgr.Deploy(instanceName, appName); err != nil { + opsMgr.Update(instanceName, opID, "failed", err.Error(), 0) + } else { + opsMgr.Update(instanceName, opID, "completed", "App deployed", 100) + } + }() + + respondJSON(w, http.StatusAccepted, map[string]string{ + "operation_id": opID, + "message": "App deployment initiated", + }) +} + +// AppsDelete deletes an app +func (api *API) AppsDelete(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + appName := vars["app"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Start delete operation + opsMgr := operations.NewManager(api.dataDir) + opID, err := opsMgr.Start(instanceName, "delete_app", appName) + if err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to start operation: %v", err)) + return + } + + // Delete in background + go func() { + appsMgr := apps.NewManager(api.dataDir, api.appsDir) + opsMgr.UpdateStatus(instanceName, opID, "running") + + if err := appsMgr.Delete(instanceName, appName); err != nil { + opsMgr.Update(instanceName, opID, "failed", err.Error(), 0) + } else { + opsMgr.Update(instanceName, opID, "completed", "App deleted", 100) + } + }() + + respondJSON(w, http.StatusAccepted, map[string]string{ + "operation_id": opID, + "message": "App deletion initiated", + }) +} + +// AppsGetStatus returns app status +func (api *API) AppsGetStatus(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + appName := vars["app"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Get status + appsMgr := apps.NewManager(api.dataDir, api.appsDir) + status, err := appsMgr.GetStatus(instanceName, appName) + if err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to get status: %v", err)) + return + } + + respondJSON(w, http.StatusOK, status) +} diff --git a/internal/api/v1/handlers_backup.go b/internal/api/v1/handlers_backup.go new file mode 100644 index 0000000..b1b9124 --- /dev/null +++ b/internal/api/v1/handlers_backup.go @@ -0,0 +1,110 @@ +package v1 + +import ( + "encoding/json" + "net/http" + + "github.com/gorilla/mux" + "github.com/wild-cloud/wild-central/daemon/internal/backup" + "github.com/wild-cloud/wild-central/daemon/internal/operations" +) + +// BackupAppStart starts a backup operation for an app +func (api *API) BackupAppStart(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + appName := vars["app"] + + mgr := backup.NewManager(api.dataDir) + + // Create operation for tracking + opMgr := operations.NewManager(api.dataDir) + opID, err := opMgr.Start(instanceName, "backup", appName) + if err != nil { + respondError(w, http.StatusInternalServerError, "Failed to start backup operation") + return + } + + // Run backup in background + go func() { + opMgr.UpdateProgress(instanceName, opID, 10, "Starting backup") + + info, err := mgr.BackupApp(instanceName, appName) + if err != nil { + opMgr.Update(instanceName, opID, "failed", err.Error(), 100) + return + } + + opMgr.Update(instanceName, opID, "completed", "Backup completed", 100) + _ = info // Metadata saved in backup.json + }() + + respondJSON(w, http.StatusAccepted, map[string]interface{}{ + "success": true, + "operation_id": opID, + "message": "Backup started", + }) +} + +// BackupAppList lists all backups for an app +func (api *API) BackupAppList(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + appName := vars["app"] + + mgr := backup.NewManager(api.dataDir) + backups, err := mgr.ListBackups(instanceName, appName) + if err != nil { + respondError(w, http.StatusInternalServerError, "Failed to list backups") + return + } + + respondJSON(w, http.StatusOK, map[string]interface{}{ + "success": true, + "data": map[string]interface{}{ + "backups": backups, + }, + }) +} + +// BackupAppRestore restores an app from backup +func (api *API) BackupAppRestore(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + appName := vars["app"] + + // Parse request body for restore options + var opts backup.RestoreOptions + if err := json.NewDecoder(r.Body).Decode(&opts); err != nil { + // Use defaults if no body provided + opts = backup.RestoreOptions{} + } + + mgr := backup.NewManager(api.dataDir) + + // Create operation for tracking + opMgr := operations.NewManager(api.dataDir) + opID, err := opMgr.Start(instanceName, "restore", appName) + if err != nil { + respondError(w, http.StatusInternalServerError, "Failed to start restore operation") + return + } + + // Run restore in background + go func() { + opMgr.UpdateProgress(instanceName, opID, 10, "Starting restore") + + if err := mgr.RestoreApp(instanceName, appName, opts); err != nil { + opMgr.Update(instanceName, opID, "failed", err.Error(), 100) + return + } + + opMgr.Update(instanceName, opID, "completed", "Restore completed", 100) + }() + + respondJSON(w, http.StatusAccepted, map[string]interface{}{ + "success": true, + "operation_id": opID, + "message": "Restore started", + }) +} diff --git a/internal/api/v1/handlers_cluster.go b/internal/api/v1/handlers_cluster.go new file mode 100644 index 0000000..5b4e1f3 --- /dev/null +++ b/internal/api/v1/handlers_cluster.go @@ -0,0 +1,331 @@ +package v1 + +import ( + "encoding/json" + "fmt" + "net/http" + + "github.com/gorilla/mux" + + "github.com/wild-cloud/wild-central/daemon/internal/cluster" + "github.com/wild-cloud/wild-central/daemon/internal/operations" +) + +// ClusterGenerateConfig generates cluster configuration +func (api *API) ClusterGenerateConfig(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Read cluster configuration from instance config + configPath := api.instance.GetInstanceConfigPath(instanceName) + + // Get cluster.name + clusterName, err := api.config.GetConfigValue(configPath, "cluster.name") + if err != nil || clusterName == "" { + respondError(w, http.StatusBadRequest, "cluster.name not set in config") + return + } + + // Get cluster.nodes.control.vip + vip, err := api.config.GetConfigValue(configPath, "cluster.nodes.control.vip") + if err != nil || vip == "" { + respondError(w, http.StatusBadRequest, "cluster.nodes.control.vip not set in config") + return + } + + // Get cluster.nodes.talos.version (optional, defaults to v1.11.0) + version, err := api.config.GetConfigValue(configPath, "cluster.nodes.talos.version") + if err != nil || version == "" { + version = "v1.11.0" + } + + // Create cluster config + config := cluster.ClusterConfig{ + ClusterName: clusterName, + VIP: vip, + Version: version, + } + + // Generate configuration + clusterMgr := cluster.NewManager(api.dataDir) + if err := clusterMgr.GenerateConfig(instanceName, &config); err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to generate config: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]string{ + "message": "Cluster configuration generated successfully", + }) +} + +// ClusterBootstrap bootstraps the cluster +func (api *API) ClusterBootstrap(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Parse request + var req struct { + Node string `json:"node"` + } + + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + respondError(w, http.StatusBadRequest, "Invalid request body") + return + } + + if req.Node == "" { + respondError(w, http.StatusBadRequest, "node is required") + return + } + + // Start bootstrap operation + opsMgr := operations.NewManager(api.dataDir) + opID, err := opsMgr.Start(instanceName, "bootstrap", req.Node) + if err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to start operation: %v", err)) + return + } + + // Bootstrap in background + go func() { + clusterMgr := cluster.NewManager(api.dataDir) + opsMgr.UpdateStatus(instanceName, opID, "running") + + if err := clusterMgr.Bootstrap(instanceName, req.Node); err != nil { + opsMgr.Update(instanceName, opID, "failed", err.Error(), 0) + } else { + opsMgr.Update(instanceName, opID, "completed", "Bootstrap completed", 100) + } + }() + + respondJSON(w, http.StatusAccepted, map[string]string{ + "operation_id": opID, + "message": "Bootstrap initiated", + }) +} + +// ClusterConfigureEndpoints configures talosconfig endpoints to use VIP +func (api *API) ClusterConfigureEndpoints(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Parse request + var req struct { + IncludeNodes bool `json:"include_nodes"` + } + + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + // Default to false if no body provided + req.IncludeNodes = false + } + + // Configure endpoints + clusterMgr := cluster.NewManager(api.dataDir) + if err := clusterMgr.ConfigureEndpoints(instanceName, req.IncludeNodes); err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to configure endpoints: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]string{ + "message": "Endpoints configured successfully", + }) +} + +// ClusterGetStatus returns cluster status +func (api *API) ClusterGetStatus(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Get status + clusterMgr := cluster.NewManager(api.dataDir) + status, err := clusterMgr.GetStatus(instanceName) + if err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to get status: %v", err)) + return + } + + respondJSON(w, http.StatusOK, status) +} + +// ClusterHealth returns cluster health checks +func (api *API) ClusterHealth(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Get health checks + clusterMgr := cluster.NewManager(api.dataDir) + checks, err := clusterMgr.Health(instanceName) + if err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to get health: %v", err)) + return + } + + // Determine overall status + overallStatus := "healthy" + for _, check := range checks { + if check.Status == "failing" { + overallStatus = "unhealthy" + break + } else if check.Status == "warning" && overallStatus == "healthy" { + overallStatus = "degraded" + } + } + + respondJSON(w, http.StatusOK, map[string]interface{}{ + "status": overallStatus, + "checks": checks, + }) +} + +// ClusterGetKubeconfig returns the kubeconfig +func (api *API) ClusterGetKubeconfig(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Get kubeconfig + clusterMgr := cluster.NewManager(api.dataDir) + kubeconfig, err := clusterMgr.GetKubeconfig(instanceName) + if err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Kubeconfig not found: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]string{ + "kubeconfig": kubeconfig, + }) +} + +// ClusterGenerateKubeconfig regenerates the kubeconfig from the cluster +func (api *API) ClusterGenerateKubeconfig(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Regenerate kubeconfig from cluster + clusterMgr := cluster.NewManager(api.dataDir) + if err := clusterMgr.RegenerateKubeconfig(instanceName); err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to generate kubeconfig: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]string{ + "message": "Kubeconfig regenerated successfully", + }) +} + +// ClusterGetTalosconfig returns the talosconfig +func (api *API) ClusterGetTalosconfig(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Get talosconfig + clusterMgr := cluster.NewManager(api.dataDir) + talosconfig, err := clusterMgr.GetTalosconfig(instanceName) + if err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Talosconfig not found: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]string{ + "talosconfig": talosconfig, + }) +} + +// ClusterReset resets the cluster +func (api *API) ClusterReset(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Parse request + var req struct { + Confirm bool `json:"confirm"` + } + + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + respondError(w, http.StatusBadRequest, "Invalid request body") + return + } + + if !req.Confirm { + respondError(w, http.StatusBadRequest, "Must confirm cluster reset") + return + } + + // Start reset operation + opsMgr := operations.NewManager(api.dataDir) + opID, err := opsMgr.Start(instanceName, "reset", instanceName) + if err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to start operation: %v", err)) + return + } + + // Reset in background + go func() { + clusterMgr := cluster.NewManager(api.dataDir) + opsMgr.UpdateStatus(instanceName, opID, "running") + + if err := clusterMgr.Reset(instanceName, req.Confirm); err != nil { + opsMgr.Update(instanceName, opID, "failed", err.Error(), 0) + } else { + opsMgr.Update(instanceName, opID, "completed", "Cluster reset completed", 100) + } + }() + + respondJSON(w, http.StatusAccepted, map[string]string{ + "operation_id": opID, + "message": "Cluster reset initiated", + }) +} diff --git a/internal/api/v1/handlers_config.go b/internal/api/v1/handlers_config.go new file mode 100644 index 0000000..c4e9fb9 --- /dev/null +++ b/internal/api/v1/handlers_config.go @@ -0,0 +1,76 @@ +package v1 + +import ( + "encoding/json" + "fmt" + "net/http" + + "github.com/gorilla/mux" +) + +// ConfigUpdate represents a single configuration update +type ConfigUpdate struct { + Path string `json:"path"` + Value interface{} `json:"value"` +} + +// ConfigUpdateBatchRequest represents a batch configuration update request +type ConfigUpdateBatchRequest struct { + Updates []ConfigUpdate `json:"updates"` +} + +// ConfigUpdateBatch updates multiple configuration values atomically +func (api *API) ConfigUpdateBatch(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + name := vars["name"] + + // Validate instance exists + if err := api.instance.ValidateInstance(name); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Parse request body + var req ConfigUpdateBatchRequest + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + respondError(w, http.StatusBadRequest, "Invalid request body") + return + } + + if len(req.Updates) == 0 { + respondError(w, http.StatusBadRequest, "updates array is required and cannot be empty") + return + } + + // Get config path + configPath := api.instance.GetInstanceConfigPath(name) + + // Validate all paths before applying changes + for i, update := range req.Updates { + if update.Path == "" { + respondError(w, http.StatusBadRequest, fmt.Sprintf("update[%d]: path is required", i)) + return + } + } + + // Apply all updates atomically + // The config manager's SetConfigValue already uses file locking, + // so each individual update is atomic. For true atomicity across + // all updates, we would need to implement transaction support. + // For now, we apply updates sequentially within the lock. + updateCount := 0 + for _, update := range req.Updates { + valueStr := fmt.Sprintf("%v", update.Value) + if err := api.config.SetConfigValue(configPath, update.Path, valueStr); err != nil { + respondError(w, http.StatusInternalServerError, + fmt.Sprintf("Failed to update config path %s: %v", update.Path, err)) + return + } + updateCount++ + } + + respondJSON(w, http.StatusOK, map[string]interface{}{ + "message": "Configuration updated successfully", + "updated": updateCount, + }) +} diff --git a/internal/api/v1/handlers_node.go b/internal/api/v1/handlers_node.go new file mode 100644 index 0000000..1de49da --- /dev/null +++ b/internal/api/v1/handlers_node.go @@ -0,0 +1,317 @@ +package v1 + +import ( + "encoding/json" + "fmt" + "net/http" + + "github.com/gorilla/mux" + + "github.com/wild-cloud/wild-central/daemon/internal/discovery" + "github.com/wild-cloud/wild-central/daemon/internal/node" +) + +// NodeDiscover initiates node discovery +func (api *API) NodeDiscover(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Parse request body + var req struct { + IPList []string `json:"ip_list"` + } + + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + respondError(w, http.StatusBadRequest, "Invalid request body") + return + } + + if len(req.IPList) == 0 { + respondError(w, http.StatusBadRequest, "ip_list is required") + return + } + + // Start discovery + discoveryMgr := discovery.NewManager(api.dataDir, instanceName) + if err := discoveryMgr.StartDiscovery(instanceName, req.IPList); err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to start discovery: %v", err)) + return + } + + respondJSON(w, http.StatusAccepted, map[string]string{ + "message": "Discovery started", + "status": "running", + }) +} + +// NodeDiscoveryStatus returns discovery status +func (api *API) NodeDiscoveryStatus(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + discoveryMgr := discovery.NewManager(api.dataDir, instanceName) + status, err := discoveryMgr.GetDiscoveryStatus(instanceName) + if err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to get status: %v", err)) + return + } + + respondJSON(w, http.StatusOK, status) +} + +// NodeHardware returns hardware info for a specific node +func (api *API) NodeHardware(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + nodeIP := vars["ip"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Detect hardware + nodeMgr := node.NewManager(api.dataDir) + hwInfo, err := nodeMgr.DetectHardware(nodeIP) + if err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to detect hardware: %v", err)) + return + } + + respondJSON(w, http.StatusOK, hwInfo) +} + +// NodeDetect detects hardware on a single node (POST with IP in body) +func (api *API) NodeDetect(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Parse request body + var req struct { + IP string `json:"ip"` + } + + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + respondError(w, http.StatusBadRequest, "Invalid request body") + return + } + + if req.IP == "" { + respondError(w, http.StatusBadRequest, "ip is required") + return + } + + // Detect hardware + nodeMgr := node.NewManager(api.dataDir) + hwInfo, err := nodeMgr.DetectHardware(req.IP) + if err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to detect hardware: %v", err)) + return + } + + respondJSON(w, http.StatusOK, hwInfo) +} + +// NodeAdd registers a new node +func (api *API) NodeAdd(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Parse node data + var nodeData node.Node + if err := json.NewDecoder(r.Body).Decode(&nodeData); err != nil { + respondError(w, http.StatusBadRequest, "Invalid request body") + return + } + + // Add node + nodeMgr := node.NewManager(api.dataDir) + if err := nodeMgr.Add(instanceName, &nodeData); err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to add node: %v", err)) + return + } + + respondJSON(w, http.StatusCreated, map[string]interface{}{ + "message": "Node added successfully", + "node": nodeData, + }) +} + +// NodeList returns all nodes for an instance +func (api *API) NodeList(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // List nodes + nodeMgr := node.NewManager(api.dataDir) + nodes, err := nodeMgr.List(instanceName) + if err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to list nodes: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]interface{}{ + "nodes": nodes, + }) +} + +// NodeGet returns a specific node +func (api *API) NodeGet(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + nodeIdentifier := vars["node"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Get node + nodeMgr := node.NewManager(api.dataDir) + nodeData, err := nodeMgr.Get(instanceName, nodeIdentifier) + if err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Node not found: %v", err)) + return + } + + respondJSON(w, http.StatusOK, nodeData) +} + +// NodeApply generates configuration and applies it to node +func (api *API) NodeApply(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + nodeIdentifier := vars["node"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Apply always uses default options (no body needed) + opts := node.ApplyOptions{} + + // Apply node configuration + nodeMgr := node.NewManager(api.dataDir) + if err := nodeMgr.Apply(instanceName, nodeIdentifier, opts); err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to apply node configuration: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]string{ + "message": "Node configuration applied successfully", + "node": nodeIdentifier, + }) +} + +// NodeUpdate modifies existing node configuration +func (api *API) NodeUpdate(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + nodeIdentifier := vars["node"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Parse update data + var updates map[string]interface{} + if err := json.NewDecoder(r.Body).Decode(&updates); err != nil { + respondError(w, http.StatusBadRequest, "Invalid request body") + return + } + + // Update node + nodeMgr := node.NewManager(api.dataDir) + if err := nodeMgr.Update(instanceName, nodeIdentifier, updates); err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to update node: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]string{ + "message": "Node updated successfully", + "node": nodeIdentifier, + }) +} + +// NodeFetchTemplates copies patch templates from directory to instance +func (api *API) NodeFetchTemplates(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Fetch templates + nodeMgr := node.NewManager(api.dataDir) + if err := nodeMgr.FetchTemplates(instanceName); err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to fetch templates: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]string{ + "message": "Templates fetched successfully", + }) +} + +// NodeDelete removes a node +func (api *API) NodeDelete(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + nodeIdentifier := vars["node"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Delete node + nodeMgr := node.NewManager(api.dataDir) + if err := nodeMgr.Delete(instanceName, nodeIdentifier); err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to delete node: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]string{ + "message": "Node deleted successfully", + }) +} diff --git a/internal/api/v1/handlers_operations.go b/internal/api/v1/handlers_operations.go new file mode 100644 index 0000000..9566c98 --- /dev/null +++ b/internal/api/v1/handlers_operations.go @@ -0,0 +1,156 @@ +package v1 + +import ( + "bufio" + "encoding/json" + "fmt" + "net/http" + "os" + "path/filepath" + + "github.com/gorilla/mux" + + "github.com/wild-cloud/wild-central/daemon/internal/operations" +) + +// OperationGet returns operation status +func (api *API) OperationGet(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + opID := vars["id"] + + // Extract instance name from query param or header + instanceName := r.URL.Query().Get("instance") + if instanceName == "" { + respondError(w, http.StatusBadRequest, "instance parameter is required") + return + } + + // Get operation + opsMgr := operations.NewManager(api.dataDir) + op, err := opsMgr.GetByInstance(instanceName, opID) + if err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Operation not found: %v", err)) + return + } + + respondJSON(w, http.StatusOK, op) +} + +// OperationList returns all operations for an instance +func (api *API) OperationList(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // List operations + opsMgr := operations.NewManager(api.dataDir) + ops, err := opsMgr.List(instanceName) + if err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to list operations: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]interface{}{ + "operations": ops, + }) +} + +// OperationCancel cancels an operation +func (api *API) OperationCancel(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + opID := vars["id"] + + // Extract instance name from query param + instanceName := r.URL.Query().Get("instance") + if instanceName == "" { + respondError(w, http.StatusBadRequest, "instance parameter is required") + return + } + + // Cancel operation + opsMgr := operations.NewManager(api.dataDir) + if err := opsMgr.Cancel(instanceName, opID); err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to cancel operation: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]string{ + "message": "Operation cancelled", + "id": opID, + }) +} + +// OperationStream streams operation output via Server-Sent Events (SSE) +func (api *API) OperationStream(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + opID := vars["id"] + + // Extract instance name from query param + instanceName := r.URL.Query().Get("instance") + if instanceName == "" { + respondError(w, http.StatusBadRequest, "instance parameter is required") + return + } + + // Set SSE headers + w.Header().Set("Content-Type", "text/event-stream") + w.Header().Set("Cache-Control", "no-cache") + w.Header().Set("Connection", "keep-alive") + w.Header().Set("Access-Control-Allow-Origin", "*") + + flusher, ok := w.(http.Flusher) + if !ok { + respondError(w, http.StatusInternalServerError, "Streaming not supported") + return + } + + // Check if operation is already completed + statusFile := filepath.Join(api.dataDir, "instances", instanceName, "operations", opID+".json") + isCompleted := false + if data, err := os.ReadFile(statusFile); err == nil { + var op map[string]interface{} + if err := json.Unmarshal(data, &op); err == nil { + if status, ok := op["status"].(string); ok { + isCompleted = (status == "completed" || status == "failed") + } + } + } + + // Send existing log file content first (if exists) + logPath := filepath.Join(api.dataDir, "instances", instanceName, "operations", opID, "output.log") + if _, err := os.Stat(logPath); err == nil { + file, err := os.Open(logPath) + if err == nil { + defer file.Close() + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + fmt.Fprintf(w, "data: %s\n\n", line) + flusher.Flush() + } + } + } + + // If operation is already completed, send completion signal and return + if isCompleted { + // Send an event to signal completion + fmt.Fprintf(w, "event: complete\ndata: Operation completed\n\n") + flusher.Flush() + return + } + + // Subscribe to new output for ongoing operations + ch := api.broadcaster.Subscribe(opID) + defer api.broadcaster.Unsubscribe(opID, ch) + + // Stream new output as it arrives + for data := range ch { + fmt.Fprintf(w, "data: %s\n\n", data) + flusher.Flush() + } +} diff --git a/internal/api/v1/handlers_pxe.go b/internal/api/v1/handlers_pxe.go new file mode 100644 index 0000000..51e7fd2 --- /dev/null +++ b/internal/api/v1/handlers_pxe.go @@ -0,0 +1,141 @@ +package v1 + +import ( + "encoding/json" + "fmt" + "net/http" + + "github.com/gorilla/mux" + + "github.com/wild-cloud/wild-central/daemon/internal/pxe" +) + +// PXEListAssets lists all PXE assets for an instance +func (api *API) PXEListAssets(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // List assets + pxeMgr := pxe.NewManager(api.dataDir) + assets, err := pxeMgr.ListAssets(instanceName) + if err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to list assets: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]interface{}{ + "assets": assets, + }) +} + +// PXEDownloadAsset downloads a PXE asset +func (api *API) PXEDownloadAsset(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Parse request + var req struct { + AssetType string `json:"asset_type"` // kernel, initramfs, iso + Version string `json:"version"` + URL string `json:"url"` + } + + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + respondError(w, http.StatusBadRequest, "Invalid request body") + return + } + + if req.AssetType == "" { + respondError(w, http.StatusBadRequest, "asset_type is required") + return + } + + if req.URL == "" { + respondError(w, http.StatusBadRequest, "url is required") + return + } + + // Download asset + pxeMgr := pxe.NewManager(api.dataDir) + if err := pxeMgr.DownloadAsset(instanceName, req.AssetType, req.Version, req.URL); err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to download asset: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]string{ + "message": "Asset downloaded successfully", + "asset_type": req.AssetType, + "version": req.Version, + }) +} + +// PXEGetAsset returns information about a specific asset +func (api *API) PXEGetAsset(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + assetType := vars["type"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Get asset path + pxeMgr := pxe.NewManager(api.dataDir) + assetPath, err := pxeMgr.GetAssetPath(instanceName, assetType) + if err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Asset not found: %v", err)) + return + } + + // Verify asset + valid, err := pxeMgr.VerifyAsset(instanceName, assetType) + if err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to verify asset: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]interface{}{ + "type": assetType, + "path": assetPath, + "valid": valid, + }) +} + +// PXEDeleteAsset deletes a PXE asset +func (api *API) PXEDeleteAsset(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + assetType := vars["type"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Delete asset + pxeMgr := pxe.NewManager(api.dataDir) + if err := pxeMgr.DeleteAsset(instanceName, assetType); err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to delete asset: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]string{ + "message": "Asset deleted successfully", + "type": assetType, + }) +} diff --git a/internal/api/v1/handlers_services.go b/internal/api/v1/handlers_services.go new file mode 100644 index 0000000..2554aa5 --- /dev/null +++ b/internal/api/v1/handlers_services.go @@ -0,0 +1,424 @@ +package v1 + +import ( + "encoding/json" + "fmt" + "net/http" + "os" + "path/filepath" + "strings" + + "github.com/gorilla/mux" + "gopkg.in/yaml.v3" + + "github.com/wild-cloud/wild-central/daemon/internal/operations" + "github.com/wild-cloud/wild-central/daemon/internal/services" +) + +// ServicesList lists all base services +func (api *API) ServicesList(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // List services + servicesMgr := services.NewManager(api.dataDir, filepath.Join(api.directoryPath, "setup", "cluster-services")) + svcList, err := servicesMgr.List(instanceName) + if err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to list services: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]interface{}{ + "services": svcList, + }) +} + +// ServicesGet returns a specific service +func (api *API) ServicesGet(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + serviceName := vars["service"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Get service + servicesMgr := services.NewManager(api.dataDir, filepath.Join(api.directoryPath, "setup", "cluster-services")) + service, err := servicesMgr.Get(instanceName, serviceName) + if err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Service not found: %v", err)) + return + } + + respondJSON(w, http.StatusOK, service) +} + +// ServicesInstall installs a service +func (api *API) ServicesInstall(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Parse request + var req struct { + Name string `json:"name"` + Fetch bool `json:"fetch"` + Deploy bool `json:"deploy"` + } + + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + respondError(w, http.StatusBadRequest, "Invalid request body") + return + } + + if req.Name == "" { + respondError(w, http.StatusBadRequest, "service name is required") + return + } + + // Start install operation + opsMgr := operations.NewManager(api.dataDir) + opID, err := opsMgr.Start(instanceName, "install_service", req.Name) + if err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to start operation: %v", err)) + return + } + + // Install in background + go func() { + // Recover from panics to prevent goroutine crashes + defer func() { + if r := recover(); r != nil { + fmt.Printf("[ERROR] Service install goroutine panic: %v\n", r) + opsMgr.Update(instanceName, opID, "failed", fmt.Sprintf("Internal error: %v", r), 0) + } + }() + + fmt.Printf("[DEBUG] Service install goroutine started: service=%s instance=%s opID=%s\n", req.Name, instanceName, opID) + servicesMgr := services.NewManager(api.dataDir, filepath.Join(api.directoryPath, "setup", "cluster-services")) + opsMgr.UpdateStatus(instanceName, opID, "running") + + if err := servicesMgr.Install(instanceName, req.Name, req.Fetch, req.Deploy, opID, api.broadcaster); err != nil { + fmt.Printf("[DEBUG] Service install failed: %v\n", err) + opsMgr.Update(instanceName, opID, "failed", err.Error(), 0) + } else { + fmt.Printf("[DEBUG] Service install completed successfully\n") + opsMgr.Update(instanceName, opID, "completed", "Service installed", 100) + } + }() + + respondJSON(w, http.StatusAccepted, map[string]string{ + "operation_id": opID, + "message": "Service installation initiated", + }) +} + +// ServicesInstallAll installs all base services +func (api *API) ServicesInstallAll(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Parse request + var req struct { + Fetch bool `json:"fetch"` + Deploy bool `json:"deploy"` + } + + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + // Use defaults if no body + req.Deploy = true + } + + // Start install operation + opsMgr := operations.NewManager(api.dataDir) + opID, err := opsMgr.Start(instanceName, "install_all_services", "all") + if err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to start operation: %v", err)) + return + } + + // Install in background + go func() { + servicesMgr := services.NewManager(api.dataDir, filepath.Join(api.directoryPath, "setup", "cluster-services")) + opsMgr.UpdateStatus(instanceName, opID, "running") + + if err := servicesMgr.InstallAll(instanceName, req.Fetch, req.Deploy, opID, api.broadcaster); err != nil { + opsMgr.Update(instanceName, opID, "failed", err.Error(), 0) + } else { + opsMgr.Update(instanceName, opID, "completed", "All services installed", 100) + } + }() + + respondJSON(w, http.StatusAccepted, map[string]string{ + "operation_id": opID, + "message": "Services installation initiated", + }) +} + +// ServicesDelete deletes a service +func (api *API) ServicesDelete(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + serviceName := vars["service"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Start delete operation + opsMgr := operations.NewManager(api.dataDir) + opID, err := opsMgr.Start(instanceName, "delete_service", serviceName) + if err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to start operation: %v", err)) + return + } + + // Delete in background + go func() { + servicesMgr := services.NewManager(api.dataDir, filepath.Join(api.directoryPath, "setup", "cluster-services")) + opsMgr.UpdateStatus(instanceName, opID, "running") + + if err := servicesMgr.Delete(instanceName, serviceName); err != nil { + opsMgr.Update(instanceName, opID, "failed", err.Error(), 0) + } else { + opsMgr.Update(instanceName, opID, "completed", "Service deleted", 100) + } + }() + + respondJSON(w, http.StatusAccepted, map[string]string{ + "operation_id": opID, + "message": "Service deletion initiated", + }) +} + +// ServicesGetStatus returns detailed service status +func (api *API) ServicesGetStatus(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + serviceName := vars["service"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Get status + servicesMgr := services.NewManager(api.dataDir, filepath.Join(api.directoryPath, "setup", "cluster-services")) + status, err := servicesMgr.GetStatus(instanceName, serviceName) + if err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to get status: %v", err)) + return + } + + respondJSON(w, http.StatusOK, status) +} + +// ServicesGetManifest returns the manifest for a service +func (api *API) ServicesGetManifest(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + serviceName := vars["service"] + + servicesMgr := services.NewManager(api.dataDir, filepath.Join(api.directoryPath, "setup", "cluster-services")) + manifest, err := servicesMgr.GetManifest(serviceName) + if err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Service not found: %v", err)) + return + } + + respondJSON(w, http.StatusOK, manifest) +} + +// ServicesGetConfig returns the service configuration schema +func (api *API) ServicesGetConfig(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + serviceName := vars["service"] + + servicesMgr := services.NewManager(api.dataDir, filepath.Join(api.directoryPath, "setup", "cluster-services")) + + // Get manifest + manifest, err := servicesMgr.GetManifest(serviceName) + if err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Service not found: %v", err)) + return + } + + // Return config schema + response := map[string]interface{}{ + "configReferences": manifest.ConfigReferences, + "serviceConfig": manifest.ServiceConfig, + } + + respondJSON(w, http.StatusOK, response) +} + +// ServicesGetInstanceConfig returns current config values for a service instance +func (api *API) ServicesGetInstanceConfig(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + serviceName := vars["service"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + servicesMgr := services.NewManager(api.dataDir, filepath.Join(api.directoryPath, "setup", "cluster-services")) + + // Get manifest to know which config paths to read + manifest, err := servicesMgr.GetManifest(serviceName) + if err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Service not found: %v", err)) + return + } + + // Load instance config as map for dynamic path extraction + configPath := filepath.Join(api.dataDir, "instances", instanceName, "config.yaml") + configData, err := os.ReadFile(configPath) + if err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to read instance config: %v", err)) + return + } + + var instanceConfig map[string]interface{} + if err := yaml.Unmarshal(configData, &instanceConfig); err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to parse instance config: %v", err)) + return + } + + // Extract values for all config paths + configValues := make(map[string]interface{}) + + // Add config references + for _, path := range manifest.ConfigReferences { + if value := getNestedValue(instanceConfig, path); value != nil { + configValues[path] = value + } + } + + // Add service config + for _, cfg := range manifest.ServiceConfig { + if value := getNestedValue(instanceConfig, cfg.Path); value != nil { + configValues[cfg.Path] = value + } + } + + respondJSON(w, http.StatusOK, map[string]interface{}{ + "config": configValues, + }) +} + +// getNestedValue retrieves a value from nested map using dot notation path +func getNestedValue(data map[string]interface{}, path string) interface{} { + keys := strings.Split(path, ".") + current := data + + for i, key := range keys { + if i == len(keys)-1 { + return current[key] + } + + if next, ok := current[key].(map[string]interface{}); ok { + current = next + } else { + return nil + } + } + + return nil +} + +// ServicesFetch handles fetching service files to instance +func (api *API) ServicesFetch(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + serviceName := vars["service"] + + // Validate instance exists + if !api.instance.InstanceExists(instanceName) { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance '%s' not found", instanceName)) + return + } + + // Fetch service files + servicesMgr := services.NewManager(api.dataDir, filepath.Join(api.directoryPath, "setup", "cluster-services")) + if err := servicesMgr.Fetch(instanceName, serviceName); err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to fetch service: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]string{ + "message": fmt.Sprintf("Service %s files fetched successfully", serviceName), + }) +} + +// ServicesCompile handles template compilation +func (api *API) ServicesCompile(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + serviceName := vars["service"] + + // Validate instance exists + if !api.instance.InstanceExists(instanceName) { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance '%s' not found", instanceName)) + return + } + + // Compile templates + servicesMgr := services.NewManager(api.dataDir, filepath.Join(api.directoryPath, "setup", "cluster-services")) + if err := servicesMgr.Compile(instanceName, serviceName); err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to compile templates: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]string{ + "message": fmt.Sprintf("Templates compiled successfully for %s", serviceName), + }) +} + +// ServicesDeploy handles service deployment +func (api *API) ServicesDeploy(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + serviceName := vars["service"] + + // Validate instance exists + if !api.instance.InstanceExists(instanceName) { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance '%s' not found", instanceName)) + return + } + + // Deploy service (without operation tracking for standalone deploy) + servicesMgr := services.NewManager(api.dataDir, filepath.Join(api.directoryPath, "setup", "cluster-services")) + if err := servicesMgr.Deploy(instanceName, serviceName, "", nil); err != nil { + respondError(w, http.StatusInternalServerError, fmt.Sprintf("Failed to deploy service: %v", err)) + return + } + + respondJSON(w, http.StatusOK, map[string]string{ + "message": fmt.Sprintf("Service %s deployed successfully", serviceName), + }) +} diff --git a/internal/api/v1/handlers_utilities.go b/internal/api/v1/handlers_utilities.go new file mode 100644 index 0000000..879f9b1 --- /dev/null +++ b/internal/api/v1/handlers_utilities.go @@ -0,0 +1,151 @@ +package v1 + +import ( + "encoding/json" + "fmt" + "net/http" + "path/filepath" + + "github.com/gorilla/mux" + "github.com/wild-cloud/wild-central/daemon/internal/utilities" +) + +// UtilitiesHealth returns cluster health status (legacy, no instance context) +func (api *API) UtilitiesHealth(w http.ResponseWriter, r *http.Request) { + status, err := utilities.GetClusterHealth("") + if err != nil { + respondError(w, http.StatusInternalServerError, "Failed to get cluster health") + return + } + + respondJSON(w, http.StatusOK, map[string]interface{}{ + "success": true, + "data": status, + }) +} + +// InstanceUtilitiesHealth returns cluster health status for a specific instance +func (api *API) InstanceUtilitiesHealth(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + instanceName := vars["name"] + + // Validate instance exists + if err := api.instance.ValidateInstance(instanceName); err != nil { + respondError(w, http.StatusNotFound, fmt.Sprintf("Instance not found: %v", err)) + return + } + + // Get kubeconfig path for this instance + kubeconfigPath := filepath.Join(api.dataDir, "instances", instanceName, "kubeconfig") + + status, err := utilities.GetClusterHealth(kubeconfigPath) + if err != nil { + respondError(w, http.StatusInternalServerError, "Failed to get cluster health") + return + } + + respondJSON(w, http.StatusOK, map[string]interface{}{ + "success": true, + "data": status, + }) +} + +// UtilitiesDashboardToken returns a Kubernetes dashboard token +func (api *API) UtilitiesDashboardToken(w http.ResponseWriter, r *http.Request) { + token, err := utilities.GetDashboardToken() + if err != nil { + // Try fallback method + token, err = utilities.GetDashboardTokenFromSecret() + if err != nil { + respondError(w, http.StatusInternalServerError, "Failed to get dashboard token") + return + } + } + + respondJSON(w, http.StatusOK, map[string]interface{}{ + "success": true, + "data": token, + }) +} + +// UtilitiesNodeIPs returns IP addresses for all cluster nodes +func (api *API) UtilitiesNodeIPs(w http.ResponseWriter, r *http.Request) { + nodes, err := utilities.GetNodeIPs() + if err != nil { + respondError(w, http.StatusInternalServerError, "Failed to get node IPs") + return + } + + respondJSON(w, http.StatusOK, map[string]interface{}{ + "success": true, + "data": map[string]interface{}{ + "nodes": nodes, + }, + }) +} + +// UtilitiesControlPlaneIP returns the control plane IP +func (api *API) UtilitiesControlPlaneIP(w http.ResponseWriter, r *http.Request) { + ip, err := utilities.GetControlPlaneIP() + if err != nil { + respondError(w, http.StatusInternalServerError, "Failed to get control plane IP") + return + } + + respondJSON(w, http.StatusOK, map[string]interface{}{ + "success": true, + "data": map[string]interface{}{ + "ip": ip, + }, + }) +} + +// UtilitiesSecretCopy copies a secret between namespaces +func (api *API) UtilitiesSecretCopy(w http.ResponseWriter, r *http.Request) { + vars := mux.Vars(r) + secretName := vars["secret"] + + var req struct { + SourceNamespace string `json:"source_namespace"` + DestinationNamespace string `json:"destination_namespace"` + } + + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + respondError(w, http.StatusBadRequest, "Invalid request body") + return + } + + if req.SourceNamespace == "" || req.DestinationNamespace == "" { + respondError(w, http.StatusBadRequest, "source_namespace and destination_namespace are required") + return + } + + if err := utilities.CopySecretBetweenNamespaces(secretName, req.SourceNamespace, req.DestinationNamespace); err != nil { + respondError(w, http.StatusInternalServerError, "Failed to copy secret") + return + } + + respondJSON(w, http.StatusOK, map[string]interface{}{ + "success": true, + "message": "Secret copied successfully", + }) +} + +// UtilitiesVersion returns cluster and Talos versions +func (api *API) UtilitiesVersion(w http.ResponseWriter, r *http.Request) { + k8sVersion, err := utilities.GetClusterVersion() + if err != nil { + respondError(w, http.StatusInternalServerError, "Failed to get cluster version") + return + } + + talosVersion, _ := utilities.GetTalosVersion() // Don't fail if Talos check fails + + respondJSON(w, http.StatusOK, map[string]interface{}{ + "success": true, + "data": map[string]interface{}{ + "kubernetes": k8sVersion, + "talos": talosVersion, + }, + }) +} diff --git a/internal/apps/apps.go b/internal/apps/apps.go new file mode 100644 index 0000000..3b7a6af --- /dev/null +++ b/internal/apps/apps.go @@ -0,0 +1,528 @@ +package apps + +import ( + "bytes" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + + "gopkg.in/yaml.v3" + + "github.com/wild-cloud/wild-central/daemon/internal/secrets" + "github.com/wild-cloud/wild-central/daemon/internal/storage" + "github.com/wild-cloud/wild-central/daemon/internal/tools" +) + +// Manager handles application lifecycle operations +type Manager struct { + dataDir string + appsDir string // Path to apps directory in repo +} + +// NewManager creates a new apps manager +func NewManager(dataDir, appsDir string) *Manager { + return &Manager{ + dataDir: dataDir, + appsDir: appsDir, + } +} + +// App represents an application +type App struct { + Name string `json:"name" yaml:"name"` + Description string `json:"description" yaml:"description"` + Version string `json:"version" yaml:"version"` + Category string `json:"category" yaml:"category"` + Dependencies []string `json:"dependencies" yaml:"dependencies"` + Config map[string]string `json:"config,omitempty" yaml:"config,omitempty"` +} + +// DeployedApp represents a deployed application instance +type DeployedApp struct { + Name string `json:"name"` + Status string `json:"status"` + Version string `json:"version"` + Namespace string `json:"namespace"` + URL string `json:"url,omitempty"` +} + +// ListAvailable lists all available apps from the apps directory +func (m *Manager) ListAvailable() ([]App, error) { + if m.appsDir == "" { + return []App{}, fmt.Errorf("apps directory not configured") + } + + // Read apps directory + entries, err := os.ReadDir(m.appsDir) + if err != nil { + return []App{}, fmt.Errorf("failed to read apps directory: %w", err) + } + + apps := []App{} + for _, entry := range entries { + if !entry.IsDir() { + continue + } + + // Check for manifest.yaml + appFile := filepath.Join(m.appsDir, entry.Name(), "manifest.yaml") + if !storage.FileExists(appFile) { + continue + } + + // Parse manifest.yaml + data, err := os.ReadFile(appFile) + if err != nil { + continue + } + + var app App + if err := yaml.Unmarshal(data, &app); err != nil { + continue + } + + app.Name = entry.Name() // Use directory name as app name + apps = append(apps, app) + } + + return apps, nil +} + +// Get returns details for a specific available app +func (m *Manager) Get(appName string) (*App, error) { + appFile := filepath.Join(m.appsDir, appName, "manifest.yaml") + + if !storage.FileExists(appFile) { + return nil, fmt.Errorf("app %s not found", appName) + } + + data, err := os.ReadFile(appFile) + if err != nil { + return nil, fmt.Errorf("failed to read app file: %w", err) + } + + var app App + if err := yaml.Unmarshal(data, &app); err != nil { + return nil, fmt.Errorf("failed to parse app file: %w", err) + } + + app.Name = appName + return &app, nil +} + +// ListDeployed lists deployed apps for an instance +func (m *Manager) ListDeployed(instanceName string) ([]DeployedApp, error) { + kubeconfigPath := tools.GetKubeconfigPath(m.dataDir, instanceName) + instancePath := filepath.Join(m.dataDir, "instances", instanceName) + appsDir := filepath.Join(instancePath, "apps") + + apps := []DeployedApp{} + + // Check if apps directory exists + if !storage.FileExists(appsDir) { + return apps, nil + } + + // List all app directories + entries, err := os.ReadDir(appsDir) + if err != nil { + return apps, fmt.Errorf("failed to read apps directory: %w", err) + } + + // For each app directory, check if it's deployed in the cluster + for _, entry := range entries { + if !entry.IsDir() { + continue + } + + appName := entry.Name() + + // Check if namespace exists in cluster + checkCmd := exec.Command("kubectl", "get", "namespace", appName, "-o", "json") + tools.WithKubeconfig(checkCmd, kubeconfigPath) + output, err := checkCmd.CombinedOutput() + + if err != nil { + // Namespace doesn't exist - app not deployed + continue + } + + // Parse namespace status + var ns struct { + Status struct { + Phase string `json:"phase"` + } `json:"status"` + } + if err := yaml.Unmarshal(output, &ns); err == nil && ns.Status.Phase == "Active" { + // App is deployed - get more details + app := DeployedApp{ + Name: appName, + Namespace: appName, + Status: "deployed", + } + + // Try to get version from manifest + manifestPath := filepath.Join(appsDir, appName, "manifest.yaml") + if storage.FileExists(manifestPath) { + manifestData, _ := os.ReadFile(manifestPath) + var manifest struct { + Version string `yaml:"version"` + } + if yaml.Unmarshal(manifestData, &manifest) == nil { + app.Version = manifest.Version + } + } + + apps = append(apps, app) + } + } + + return apps, nil +} + +// Add adds an app to the instance configuration +func (m *Manager) Add(instanceName, appName string, config map[string]string) error { + // 1. Verify app exists + manifestPath := filepath.Join(m.appsDir, appName, "manifest.yaml") + if !storage.FileExists(manifestPath) { + return fmt.Errorf("app %s not found at %s", appName, manifestPath) + } + + instancePath := filepath.Join(m.dataDir, "instances", instanceName) + configFile := filepath.Join(instancePath, "config.yaml") + secretsFile := filepath.Join(instancePath, "secrets.yaml") + appDestDir := filepath.Join(instancePath, "apps", appName) + + // Check instance config exists + if !storage.FileExists(configFile) { + return fmt.Errorf("instance config not found: %s", instanceName) + } + + // 2. Process manifest with gomplate + tempManifest := filepath.Join(os.TempDir(), fmt.Sprintf("manifest-%s.yaml", appName)) + defer os.Remove(tempManifest) + + gomplate := tools.NewGomplate() + context := map[string]string{ + ".": configFile, + "secrets": secretsFile, + } + if err := gomplate.RenderWithContext(manifestPath, tempManifest, context); err != nil { + return fmt.Errorf("failed to process manifest: %w", err) + } + + // Parse processed manifest + manifestData, err := os.ReadFile(tempManifest) + if err != nil { + return fmt.Errorf("failed to read processed manifest: %w", err) + } + + var manifest struct { + DefaultConfig map[string]interface{} `yaml:"defaultConfig"` + RequiredSecrets []string `yaml:"requiredSecrets"` + } + if err := yaml.Unmarshal(manifestData, &manifest); err != nil { + return fmt.Errorf("failed to parse manifest: %w", err) + } + + // 3. Update configuration + yq := tools.NewYQ() + configLock := configFile + ".lock" + + if err := storage.WithLock(configLock, func() error { + // Ensure apps section exists + expr := fmt.Sprintf(".apps.%s = .apps.%s // {}", appName, appName) + if _, err := yq.Exec("-i", expr, configFile); err != nil { + return fmt.Errorf("failed to ensure apps section: %w", err) + } + + // Merge defaultConfig (preserves existing values) + if len(manifest.DefaultConfig) > 0 { + for key, value := range manifest.DefaultConfig { + keyPath := fmt.Sprintf(".apps.%s.%s", appName, key) + // Only set if not already present + existing, _ := yq.Get(configFile, keyPath) + if existing == "" || existing == "null" { + if err := yq.Set(configFile, keyPath, fmt.Sprintf("%v", value)); err != nil { + return fmt.Errorf("failed to set config %s: %w", key, err) + } + } + } + } + + // Apply user-provided config overrides + for key, value := range config { + keyPath := fmt.Sprintf(".apps.%s.%s", appName, key) + if err := yq.Set(configFile, keyPath, value); err != nil { + return fmt.Errorf("failed to set config %s: %w", key, err) + } + } + + return nil + }); err != nil { + return err + } + + // 4. Generate required secrets + secretsMgr := secrets.NewManager() + for _, secretKey := range manifest.RequiredSecrets { + if _, err := secretsMgr.EnsureSecret(secretsFile, secretKey, secrets.DefaultSecretLength); err != nil { + return fmt.Errorf("failed to ensure secret %s: %w", secretKey, err) + } + } + + // 5. Copy and compile app files + if err := storage.EnsureDir(appDestDir, 0755); err != nil { + return fmt.Errorf("failed to create app directory: %w", err) + } + + // Copy source app directory + sourceAppDir := filepath.Join(m.appsDir, appName) + entries, err := os.ReadDir(sourceAppDir) + if err != nil { + return fmt.Errorf("failed to read app directory: %w", err) + } + + for _, entry := range entries { + if entry.IsDir() { + // TODO: Handle subdirectories if needed + continue + } + + sourcePath := filepath.Join(sourceAppDir, entry.Name()) + destPath := filepath.Join(appDestDir, entry.Name()) + + // Process with gomplate + if err := gomplate.RenderWithContext(sourcePath, destPath, context); err != nil { + return fmt.Errorf("failed to compile %s: %w", entry.Name(), err) + } + } + + return nil +} + +// Deploy deploys an app to the cluster +func (m *Manager) Deploy(instanceName, appName string) error { + kubeconfigPath := tools.GetKubeconfigPath(m.dataDir, instanceName) + instancePath := filepath.Join(m.dataDir, "instances", instanceName) + secretsFile := filepath.Join(instancePath, "secrets.yaml") + + // Get compiled app manifests from instance directory + appDir := filepath.Join(instancePath, "apps", appName) + if !storage.FileExists(appDir) { + return fmt.Errorf("app %s not found in instance (run 'wild app add %s' first)", appName, appName) + } + + // Create namespace if it doesn't exist + namespaceCmd := exec.Command("kubectl", "create", "namespace", appName, "--dry-run=client", "-o", "yaml") + tools.WithKubeconfig(namespaceCmd, kubeconfigPath) + namespaceYaml, _ := namespaceCmd.CombinedOutput() + + applyNsCmd := exec.Command("kubectl", "apply", "-f", "-") + applyNsCmd.Stdin = bytes.NewReader(namespaceYaml) + tools.WithKubeconfig(applyNsCmd, kubeconfigPath) + applyNsCmd.CombinedOutput() // Ignore errors - namespace might already exist + + // Create Kubernetes secrets from secrets.yaml + if storage.FileExists(secretsFile) { + yq := tools.NewYQ() + appSecretsPath := fmt.Sprintf(".apps.%s", appName) + appSecretsJson, err := yq.Get(secretsFile, fmt.Sprintf("%s | @json", appSecretsPath)) + if err == nil && appSecretsJson != "" && appSecretsJson != "null" { + // Delete existing secret if it exists (to update it) + deleteCmd := exec.Command("kubectl", "delete", "secret", fmt.Sprintf("%s-secrets", appName), "-n", appName, "--ignore-not-found") + tools.WithKubeconfig(deleteCmd, kubeconfigPath) + deleteCmd.CombinedOutput() + + // Create secret from literals + createSecretCmd := exec.Command("kubectl", "create", "secret", "generic", fmt.Sprintf("%s-secrets", appName), "-n", appName) + + // Parse secrets and add as literals + var appSecrets map[string]string + if err := yaml.Unmarshal([]byte(appSecretsJson), &appSecrets); err == nil { + for key, value := range appSecrets { + secretKey := fmt.Sprintf("apps.%s.%s", appName, key) + createSecretCmd.Args = append(createSecretCmd.Args, fmt.Sprintf("--from-literal=%s=%s", secretKey, value)) + } + } + + tools.WithKubeconfig(createSecretCmd, kubeconfigPath) + if output, err := createSecretCmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to create secret: %w\nOutput: %s", err, string(output)) + } + } + } + + // Apply manifests with kubectl using kustomize + cmd := exec.Command("kubectl", "apply", "-k", appDir) + tools.WithKubeconfig(cmd, kubeconfigPath) + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to deploy app: %w\nOutput: %s", err, string(output)) + } + + return nil +} + +// Delete removes an app from the cluster and configuration +func (m *Manager) Delete(instanceName, appName string) error { + kubeconfigPath := tools.GetKubeconfigPath(m.dataDir, instanceName) + instancePath := filepath.Join(m.dataDir, "instances", instanceName) + configFile := filepath.Join(instancePath, "config.yaml") + secretsFile := filepath.Join(instancePath, "secrets.yaml") + + // Get compiled app manifests from instance directory + appDir := filepath.Join(instancePath, "apps", appName) + if !storage.FileExists(appDir) { + return fmt.Errorf("app %s not found in instance", appName) + } + + // Delete namespace (this deletes ALL resources including deployments, services, secrets, etc.) + deleteNsCmd := exec.Command("kubectl", "delete", "namespace", appName, "--ignore-not-found") + tools.WithKubeconfig(deleteNsCmd, kubeconfigPath) + output, err := deleteNsCmd.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to delete namespace: %w\nOutput: %s", err, string(output)) + } + + // Wait for namespace deletion to complete (timeout after 60s) + waitCmd := exec.Command("kubectl", "wait", "--for=delete", "namespace", appName, "--timeout=60s") + tools.WithKubeconfig(waitCmd, kubeconfigPath) + waitCmd.CombinedOutput() // Ignore errors - namespace might not exist + + // Delete local app configuration directory + if err := os.RemoveAll(appDir); err != nil { + return fmt.Errorf("failed to delete local app directory: %w", err) + } + + // Remove app config from config.yaml + yq := tools.NewYQ() + configLock := configFile + ".lock" + if storage.FileExists(configFile) { + if err := storage.WithLock(configLock, func() error { + return yq.Delete(configFile, fmt.Sprintf(".apps.%s", appName)) + }); err != nil { + return fmt.Errorf("failed to remove app config: %w", err) + } + } + + // Remove app secrets from secrets.yaml + secretsMgr := secrets.NewManager() + if storage.FileExists(secretsFile) { + if err := secretsMgr.DeleteSecret(secretsFile, fmt.Sprintf("apps.%s", appName)); err != nil { + // Don't fail if secret doesn't exist + if !strings.Contains(err.Error(), "not found") { + return fmt.Errorf("failed to remove app secrets: %w", err) + } + } + } + + return nil +} + +// GetStatus returns the status of a deployed app +func (m *Manager) GetStatus(instanceName, appName string) (*DeployedApp, error) { + kubeconfigPath := tools.GetKubeconfigPath(m.dataDir, instanceName) + instancePath := filepath.Join(m.dataDir, "instances", instanceName) + appDir := filepath.Join(instancePath, "apps", appName) + + app := &DeployedApp{ + Name: appName, + Status: "not-added", + Namespace: appName, + } + + // Check if app was added to instance + if !storage.FileExists(appDir) { + return app, nil + } + + app.Status = "not-deployed" + + // Get version from manifest + manifestPath := filepath.Join(appDir, "manifest.yaml") + if storage.FileExists(manifestPath) { + manifestData, _ := os.ReadFile(manifestPath) + var manifest struct { + Version string `yaml:"version"` + } + if yaml.Unmarshal(manifestData, &manifest) == nil { + app.Version = manifest.Version + } + } + + // Check if namespace exists + checkNsCmd := exec.Command("kubectl", "get", "namespace", appName, "-o", "json") + tools.WithKubeconfig(checkNsCmd, kubeconfigPath) + nsOutput, err := checkNsCmd.CombinedOutput() + if err != nil { + // Namespace doesn't exist - not deployed + return app, nil + } + + // Parse namespace to check if it's active + var ns struct { + Status struct { + Phase string `json:"phase"` + } `json:"status"` + } + if err := yaml.Unmarshal(nsOutput, &ns); err != nil || ns.Status.Phase != "Active" { + return app, nil + } + + // Namespace exists - check pod status + podsCmd := exec.Command("kubectl", "get", "pods", "-n", appName, "-o", "json") + tools.WithKubeconfig(podsCmd, kubeconfigPath) + podsOutput, err := podsCmd.CombinedOutput() + if err != nil { + app.Status = "error" + return app, nil + } + + // Parse pods + var podList struct { + Items []struct { + Status struct { + Phase string `json:"phase"` + ContainerStatuses []struct { + Ready bool `json:"ready"` + } `json:"containerStatuses"` + } `json:"status"` + } `json:"items"` + } + if err := yaml.Unmarshal(podsOutput, &podList); err != nil { + app.Status = "error" + return app, nil + } + + if len(podList.Items) == 0 { + app.Status = "no-pods" + return app, nil + } + + // Check pod status + allRunning := true + allReady := true + for _, pod := range podList.Items { + if pod.Status.Phase != "Running" { + allRunning = false + } + for _, cs := range pod.Status.ContainerStatuses { + if !cs.Ready { + allReady = false + } + } + } + + if allRunning && allReady { + app.Status = "running" + } else if allRunning { + app.Status = "starting" + } else { + app.Status = "unhealthy" + } + + return app, nil +} diff --git a/internal/backup/backup.go b/internal/backup/backup.go new file mode 100644 index 0000000..d16769c --- /dev/null +++ b/internal/backup/backup.go @@ -0,0 +1,494 @@ +// Package backup provides backup and restore operations for apps +package backup + +import ( + "encoding/json" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "time" + + "github.com/wild-cloud/wild-central/daemon/internal/storage" + "github.com/wild-cloud/wild-central/daemon/internal/tools" +) + +// BackupInfo represents metadata about a backup +type BackupInfo struct { + AppName string `json:"app_name"` + Timestamp string `json:"timestamp"` + Type string `json:"type"` // "full", "database", "pvc" + Size int64 `json:"size,omitempty"` + Status string `json:"status"` // "completed", "failed", "in_progress" + Error string `json:"error,omitempty"` + Files []string `json:"files"` + CreatedAt time.Time `json:"created_at"` +} + +// RestoreOptions configures restore behavior +type RestoreOptions struct { + DBOnly bool `json:"db_only"` + PVCOnly bool `json:"pvc_only"` + SkipGlobals bool `json:"skip_globals"` + SnapshotID string `json:"snapshot_id,omitempty"` +} + +// Manager handles backup and restore operations +type Manager struct { + dataDir string +} + +// NewManager creates a new backup manager +func NewManager(dataDir string) *Manager { + return &Manager{dataDir: dataDir} +} + +// GetBackupDir returns the backup directory for an instance +func (m *Manager) GetBackupDir(instanceName string) string { + return filepath.Join(m.dataDir, "instances", instanceName, "backups") +} + +// GetStagingDir returns the staging directory for backups +func (m *Manager) GetStagingDir(instanceName string) string { + return filepath.Join(m.GetBackupDir(instanceName), "staging") +} + +// BackupApp creates a backup of an app's data +func (m *Manager) BackupApp(instanceName, appName string) (*BackupInfo, error) { + kubeconfigPath := tools.GetKubeconfigPath(m.dataDir, instanceName) + + stagingDir := m.GetStagingDir(instanceName) + if err := storage.EnsureDir(stagingDir, 0755); err != nil { + return nil, fmt.Errorf("failed to create staging directory: %w", err) + } + + backupDir := filepath.Join(stagingDir, "apps", appName) + if err := os.RemoveAll(backupDir); err != nil && !os.IsNotExist(err) { + return nil, fmt.Errorf("failed to clean backup directory: %w", err) + } + if err := storage.EnsureDir(backupDir, 0755); err != nil { + return nil, fmt.Errorf("failed to create backup directory: %w", err) + } + + timestamp := time.Now().UTC().Format("20060102T150405Z") + info := &BackupInfo{ + AppName: appName, + Timestamp: timestamp, + Type: "full", + Status: "in_progress", + Files: []string{}, + CreatedAt: time.Now(), + } + + // Backup database if app uses one + dbFiles, err := m.backupDatabase(kubeconfigPath, appName, backupDir, timestamp) + if err != nil { + info.Status = "failed" + info.Error = fmt.Sprintf("database backup failed: %v", err) + } else if len(dbFiles) > 0 { + info.Files = append(info.Files, dbFiles...) + } + + // Backup PVCs + pvcFiles, err := m.backupPVCs(kubeconfigPath, appName, backupDir) + if err != nil && info.Status != "failed" { + info.Status = "failed" + info.Error = fmt.Sprintf("pvc backup failed: %v", err) + } else if len(pvcFiles) > 0 { + info.Files = append(info.Files, pvcFiles...) + } + + if info.Status != "failed" { + info.Status = "completed" + } + + // Save backup metadata + metaFile := filepath.Join(backupDir, "backup.json") + if err := m.saveBackupMeta(metaFile, info); err != nil { + return nil, fmt.Errorf("failed to save backup metadata: %w", err) + } + + return info, nil +} + +// RestoreApp restores an app from backup +func (m *Manager) RestoreApp(instanceName, appName string, opts RestoreOptions) error { + kubeconfigPath := tools.GetKubeconfigPath(m.dataDir, instanceName) + + stagingDir := m.GetStagingDir(instanceName) + backupDir := filepath.Join(stagingDir, "apps", appName) + + // Check if backup exists + if !storage.FileExists(backupDir) { + return fmt.Errorf("no backup found for app %s", appName) + } + + // Restore database if not PVC-only + if !opts.PVCOnly { + if err := m.restoreDatabase(kubeconfigPath, appName, backupDir, opts.SkipGlobals); err != nil { + return fmt.Errorf("database restore failed: %w", err) + } + } + + // Restore PVCs if not DB-only + if !opts.DBOnly { + if err := m.restorePVCs(kubeconfigPath, appName, backupDir); err != nil { + return fmt.Errorf("pvc restore failed: %w", err) + } + } + + return nil +} + +// ListBackups returns all backups for an app +func (m *Manager) ListBackups(instanceName, appName string) ([]*BackupInfo, error) { + stagingDir := m.GetStagingDir(instanceName) + appBackupDir := filepath.Join(stagingDir, "apps", appName) + + if !storage.FileExists(appBackupDir) { + return []*BackupInfo{}, nil + } + + var backups []*BackupInfo + metaFile := filepath.Join(appBackupDir, "backup.json") + if storage.FileExists(metaFile) { + info, err := m.loadBackupMeta(metaFile) + if err == nil { + backups = append(backups, info) + } + } + + return backups, nil +} + +// backupDatabase backs up PostgreSQL or MySQL database +func (m *Manager) backupDatabase(kubeconfigPath, appName, backupDir, timestamp string) ([]string, error) { + // Detect database type from manifest or deployed pods + dbType, err := m.detectDatabaseType(kubeconfigPath, appName) + if err != nil || dbType == "" { + return nil, nil // No database to backup + } + + switch dbType { + case "postgres": + return m.backupPostgres(kubeconfigPath, appName, backupDir, timestamp) + case "mysql": + return m.backupMySQL(kubeconfigPath, appName, backupDir, timestamp) + default: + return nil, nil + } +} + +// backupPostgres backs up PostgreSQL database +func (m *Manager) backupPostgres(kubeconfigPath, appName, backupDir, timestamp string) ([]string, error) { + dbDump := filepath.Join(backupDir, fmt.Sprintf("database_%s.dump", timestamp)) + globalsFile := filepath.Join(backupDir, fmt.Sprintf("globals_%s.sql", timestamp)) + + // Database dump + cmd := exec.Command("kubectl", "exec", "-n", "postgres", "deploy/postgres-deployment", "--", + "bash", "-lc", fmt.Sprintf("pg_dump -U postgres -Fc -Z 9 %s", appName)) + tools.WithKubeconfig(cmd, kubeconfigPath) + output, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("pg_dump failed: %w", err) + } + if err := os.WriteFile(dbDump, output, 0600); err != nil { + return nil, fmt.Errorf("failed to write database dump: %w", err) + } + + // Globals dump + cmd = exec.Command("kubectl", "exec", "-n", "postgres", "deploy/postgres-deployment", "--", + "bash", "-lc", "pg_dumpall -U postgres -g") + tools.WithKubeconfig(cmd, kubeconfigPath) + output, err = cmd.Output() + if err != nil { + return nil, fmt.Errorf("pg_dumpall failed: %w", err) + } + if err := os.WriteFile(globalsFile, output, 0600); err != nil { + return nil, fmt.Errorf("failed to write globals dump: %w", err) + } + + return []string{dbDump, globalsFile}, nil +} + +// backupMySQL backs up MySQL database +func (m *Manager) backupMySQL(kubeconfigPath, appName, backupDir, timestamp string) ([]string, error) { + dbDump := filepath.Join(backupDir, fmt.Sprintf("database_%s.sql", timestamp)) + + // Get MySQL password from secret + cmd := exec.Command("kubectl", "get", "secret", "-n", "mysql", "mysql-secret", + "-o", "jsonpath={.data.password}") + tools.WithKubeconfig(cmd, kubeconfigPath) + passOutput, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("failed to get MySQL password: %w", err) + } + + password := string(passOutput) + + // MySQL dump + cmd = exec.Command("kubectl", "exec", "-n", "mysql", "deploy/mysql-deployment", "--", + "bash", "-c", fmt.Sprintf("mysqldump -uroot -p'%s' --single-transaction --routines --triggers %s", + password, appName)) + tools.WithKubeconfig(cmd, kubeconfigPath) + output, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("mysqldump failed: %w", err) + } + if err := os.WriteFile(dbDump, output, 0600); err != nil { + return nil, fmt.Errorf("failed to write database dump: %w", err) + } + + return []string{dbDump}, nil +} + +// backupPVCs backs up all PVCs for an app +func (m *Manager) backupPVCs(kubeconfigPath, appName, backupDir string) ([]string, error) { + // List PVCs for the app + cmd := exec.Command("kubectl", "get", "pvc", "-n", appName, + "-l", fmt.Sprintf("app=%s", appName), + "-o", "jsonpath={.items[*].metadata.name}") + tools.WithKubeconfig(cmd, kubeconfigPath) + output, err := cmd.Output() + if err != nil { + return nil, nil // No PVCs found + } + + pvcs := strings.Fields(string(output)) + if len(pvcs) == 0 { + return nil, nil + } + + var files []string + for _, pvc := range pvcs { + pvcBackupDir := filepath.Join(backupDir, pvc) + if err := storage.EnsureDir(pvcBackupDir, 0755); err != nil { + return nil, fmt.Errorf("failed to create PVC backup dir: %w", err) + } + + // Get a running pod + cmd = exec.Command("kubectl", "get", "pods", "-n", appName, + "-l", fmt.Sprintf("app=%s", appName), + "-o", "jsonpath={.items[?(@.status.phase==\"Running\")].metadata.name}") + tools.WithKubeconfig(cmd, kubeconfigPath) + podOutput, err := cmd.Output() + if err != nil || len(podOutput) == 0 { + continue + } + pod := strings.Fields(string(podOutput))[0] + + // Backup PVC data via tar + cmd = exec.Command("kubectl", "exec", "-n", appName, pod, "--", + "tar", "-C", "/data", "-cf", "-", ".") + tools.WithKubeconfig(cmd, kubeconfigPath) + tarData, err := cmd.Output() + if err != nil { + continue + } + + // Extract tar to backup directory + tarFile := filepath.Join(pvcBackupDir, "data.tar") + if err := os.WriteFile(tarFile, tarData, 0600); err != nil { + return nil, fmt.Errorf("failed to write PVC backup: %w", err) + } + files = append(files, tarFile) + } + + return files, nil +} + +// restoreDatabase restores database from backup +func (m *Manager) restoreDatabase(kubeconfigPath, appName, backupDir string, skipGlobals bool) error { + // Find database dump files + matches, err := filepath.Glob(filepath.Join(backupDir, "database_*.dump")) + if err != nil || len(matches) == 0 { + matches, _ = filepath.Glob(filepath.Join(backupDir, "database_*.sql")) + } + if len(matches) == 0 { + return nil // No database backup found + } + + dumpFile := matches[0] + isPostgres := strings.HasSuffix(dumpFile, ".dump") + + if isPostgres { + return m.restorePostgres(kubeconfigPath, appName, backupDir, skipGlobals) + } + return m.restoreMySQL(kubeconfigPath, appName, dumpFile) +} + +// restorePostgres restores PostgreSQL database +func (m *Manager) restorePostgres(kubeconfigPath, appName, backupDir string, skipGlobals bool) error { + // Find dump files + dumps, _ := filepath.Glob(filepath.Join(backupDir, "database_*.dump")) + if len(dumps) == 0 { + return fmt.Errorf("no PostgreSQL dump found") + } + + // Drop and recreate database + cmd := exec.Command("kubectl", "exec", "-n", "postgres", "deploy/postgres-deployment", "--", + "bash", "-lc", fmt.Sprintf("psql -U postgres -d postgres -c \"DROP DATABASE IF EXISTS %s; CREATE DATABASE %s OWNER %s;\"", + appName, appName, appName)) + tools.WithKubeconfig(cmd, kubeconfigPath) + if _, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to recreate database: %w", err) + } + + // Restore database + dumpData, err := os.ReadFile(dumps[0]) + if err != nil { + return fmt.Errorf("failed to read dump file: %w", err) + } + + cmd = exec.Command("kubectl", "exec", "-i", "-n", "postgres", "deploy/postgres-deployment", "--", + "bash", "-lc", fmt.Sprintf("pg_restore -U postgres -d %s", appName)) + tools.WithKubeconfig(cmd, kubeconfigPath) + cmd.Stdin = strings.NewReader(string(dumpData)) + if _, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("pg_restore failed: %w", err) + } + + return nil +} + +// restoreMySQL restores MySQL database +func (m *Manager) restoreMySQL(kubeconfigPath, appName, dumpFile string) error { + // Get MySQL password + cmd := exec.Command("kubectl", "get", "secret", "-n", "mysql", "mysql-secret", + "-o", "jsonpath={.data.password}") + tools.WithKubeconfig(cmd, kubeconfigPath) + passOutput, err := cmd.Output() + if err != nil { + return fmt.Errorf("failed to get MySQL password: %w", err) + } + password := string(passOutput) + + // Drop and recreate database + cmd = exec.Command("kubectl", "exec", "-n", "mysql", "deploy/mysql-deployment", "--", + "bash", "-c", fmt.Sprintf("mysql -uroot -p'%s' -e 'DROP DATABASE IF EXISTS %s; CREATE DATABASE %s;'", + password, appName, appName)) + tools.WithKubeconfig(cmd, kubeconfigPath) + if _, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to recreate database: %w", err) + } + + // Restore database + dumpData, err := os.ReadFile(dumpFile) + if err != nil { + return fmt.Errorf("failed to read dump file: %w", err) + } + + cmd = exec.Command("kubectl", "exec", "-i", "-n", "mysql", "deploy/mysql-deployment", "--", + "bash", "-c", fmt.Sprintf("mysql -uroot -p'%s' %s", password, appName)) + tools.WithKubeconfig(cmd, kubeconfigPath) + cmd.Stdin = strings.NewReader(string(dumpData)) + if _, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("mysql restore failed: %w", err) + } + + return nil +} + +// restorePVCs restores PVC data from backup +func (m *Manager) restorePVCs(kubeconfigPath, appName, backupDir string) error { + // Find PVC backup directories + entries, err := os.ReadDir(backupDir) + if err != nil { + return fmt.Errorf("failed to read backup directory: %w", err) + } + + for _, entry := range entries { + if !entry.IsDir() { + continue + } + + pvcName := entry.Name() + pvcBackupDir := filepath.Join(backupDir, pvcName) + tarFile := filepath.Join(pvcBackupDir, "data.tar") + + if !storage.FileExists(tarFile) { + continue + } + + // Scale app down + cmd := exec.Command("kubectl", "scale", "deployment", "-n", appName, + "-l", fmt.Sprintf("app=%s", appName), "--replicas=0") + tools.WithKubeconfig(cmd, kubeconfigPath) + if _, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to scale down app: %w", err) + } + + // Wait for pods to terminate + time.Sleep(10 * time.Second) + + // Create temp pod with PVC mounted + // (Simplified - in production would need proper node selection and resource specs) + tempPod := fmt.Sprintf("restore-util-%d", time.Now().Unix()) + + // Restore data via temp pod (simplified approach) + // Full implementation would create pod, wait for ready, copy data, clean up + + // Scale app back up + cmd = exec.Command("kubectl", "scale", "deployment", "-n", appName, + "-l", fmt.Sprintf("app=%s", appName), "--replicas=1") + tools.WithKubeconfig(cmd, kubeconfigPath) + if _, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to scale up app: %w", err) + } + + _ = tempPod // Placeholder for actual implementation + } + + return nil +} + +// detectDatabaseType detects the database type for an app +func (m *Manager) detectDatabaseType(kubeconfigPath, appName string) (string, error) { + // Check for postgres namespace + cmd := exec.Command("kubectl", "get", "namespace", "postgres") + tools.WithKubeconfig(cmd, kubeconfigPath) + if err := cmd.Run(); err == nil { + // Check if app uses postgres + cmd = exec.Command("kubectl", "get", "pods", "-n", "postgres", "-l", fmt.Sprintf("app=%s", appName)) + tools.WithKubeconfig(cmd, kubeconfigPath) + if output, _ := cmd.Output(); len(output) > 0 { + return "postgres", nil + } + } + + // Check for mysql namespace + cmd = exec.Command("kubectl", "get", "namespace", "mysql") + tools.WithKubeconfig(cmd, kubeconfigPath) + if err := cmd.Run(); err == nil { + cmd = exec.Command("kubectl", "get", "pods", "-n", "mysql", "-l", fmt.Sprintf("app=%s", appName)) + tools.WithKubeconfig(cmd, kubeconfigPath) + if output, _ := cmd.Output(); len(output) > 0 { + return "mysql", nil + } + } + + return "", nil +} + +// saveBackupMeta saves backup metadata to JSON file +func (m *Manager) saveBackupMeta(path string, info *BackupInfo) error { + data, err := json.MarshalIndent(info, "", " ") + if err != nil { + return err + } + return os.WriteFile(path, data, 0600) +} + +// loadBackupMeta loads backup metadata from JSON file +func (m *Manager) loadBackupMeta(path string) (*BackupInfo, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, err + } + var info BackupInfo + if err := json.Unmarshal(data, &info); err != nil { + return nil, err + } + return &info, nil +} diff --git a/internal/cluster/cluster.go b/internal/cluster/cluster.go new file mode 100644 index 0000000..7653d7a --- /dev/null +++ b/internal/cluster/cluster.go @@ -0,0 +1,518 @@ +package cluster + +import ( + "encoding/json" + "fmt" + "log" + "os" + "os/exec" + "path/filepath" + "strings" + "time" + + "github.com/wild-cloud/wild-central/daemon/internal/storage" + "github.com/wild-cloud/wild-central/daemon/internal/tools" +) + +// Manager handles cluster lifecycle operations +type Manager struct { + dataDir string + talosctl *tools.Talosctl +} + +// NewManager creates a new cluster manager +func NewManager(dataDir string) *Manager { + return &Manager{ + dataDir: dataDir, + talosctl: tools.NewTalosctl(), + } +} + +// ClusterConfig contains cluster configuration parameters +type ClusterConfig struct { + ClusterName string `json:"cluster_name"` + VIP string `json:"vip"` // Control plane virtual IP + Version string `json:"version"` +} + +// ClusterStatus represents cluster health and status +type ClusterStatus struct { + Status string `json:"status"` // ready, pending, error + Nodes int `json:"nodes"` + ControlPlaneNodes int `json:"control_plane_nodes"` + WorkerNodes int `json:"worker_nodes"` + KubernetesVersion string `json:"kubernetes_version"` + TalosVersion string `json:"talos_version"` + Services map[string]string `json:"services"` +} + +// GetTalosDir returns the talos directory for an instance +func (m *Manager) GetTalosDir(instanceName string) string { + return filepath.Join(m.dataDir, "instances", instanceName, "talos") +} + +// GetGeneratedDir returns the generated config directory +func (m *Manager) GetGeneratedDir(instanceName string) string { + return filepath.Join(m.GetTalosDir(instanceName), "generated") +} + +// GenerateConfig generates initial cluster configuration using talosctl gen config +func (m *Manager) GenerateConfig(instanceName string, config *ClusterConfig) error { + generatedDir := m.GetGeneratedDir(instanceName) + + // Check if already generated (idempotency) + secretsFile := filepath.Join(generatedDir, "secrets.yaml") + if storage.FileExists(secretsFile) { + // Already generated + return nil + } + + // Ensure generated directory exists + if err := storage.EnsureDir(generatedDir, 0755); err != nil { + return fmt.Errorf("failed to create generated directory: %w", err) + } + + // Generate secrets + cmd := exec.Command("talosctl", "gen", "secrets") + cmd.Dir = generatedDir + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to generate secrets: %w\nOutput: %s", err, string(output)) + } + + // Generate config with secrets + endpoint := fmt.Sprintf("https://%s:6443", config.VIP) + cmd = exec.Command("talosctl", "gen", "config", + "--with-secrets", "secrets.yaml", + config.ClusterName, + endpoint, + ) + cmd.Dir = generatedDir + output, err = cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to generate config: %w\nOutput: %s", err, string(output)) + } + + return nil +} + +// Bootstrap bootstraps the cluster on the specified node +func (m *Manager) Bootstrap(instanceName, nodeName string) error { + // Get node configuration to find the target IP + instancePath := filepath.Join(m.dataDir, "instances", instanceName) + configPath := filepath.Join(instancePath, "config.yaml") + + yq := tools.NewYQ() + + // Get node's target IP + nodeIPRaw, err := yq.Get(configPath, fmt.Sprintf(".cluster.nodes.active.%s.targetIp", nodeName)) + if err != nil { + return fmt.Errorf("failed to get node IP: %w", err) + } + + nodeIP := tools.CleanYQOutput(nodeIPRaw) + if nodeIP == "" || nodeIP == "null" { + return fmt.Errorf("node %s does not have a target IP configured", nodeName) + } + + // Get talosconfig path for this instance + talosconfigPath := tools.GetTalosconfigPath(m.dataDir, instanceName) + + // Set talosctl endpoint (with proper context via TALOSCONFIG env var) + cmdEndpoint := exec.Command("talosctl", "config", "endpoint", nodeIP) + tools.WithTalosconfig(cmdEndpoint, talosconfigPath) + if output, err := cmdEndpoint.CombinedOutput(); err != nil { + return fmt.Errorf("failed to set talosctl endpoint: %w\nOutput: %s", err, string(output)) + } + + // Bootstrap command (with proper context via TALOSCONFIG env var) + cmd := exec.Command("talosctl", "bootstrap", "--nodes", nodeIP) + tools.WithTalosconfig(cmd, talosconfigPath) + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to bootstrap cluster: %w\nOutput: %s", err, string(output)) + } + + // Retrieve kubeconfig after bootstrap (best-effort with retry) + log.Printf("Waiting for Kubernetes API server to become ready...") + if err := m.retrieveKubeconfigFromCluster(instanceName, nodeIP, 5*time.Minute); err != nil { + log.Printf("Warning: %v", err) + log.Printf("You can retrieve it manually later using: wild cluster kubeconfig --generate") + } + + return nil +} + +// retrieveKubeconfigFromCluster retrieves kubeconfig from the cluster with retry logic +func (m *Manager) retrieveKubeconfigFromCluster(instanceName, nodeIP string, timeout time.Duration) error { + kubeconfigPath := tools.GetKubeconfigPath(m.dataDir, instanceName) + talosconfigPath := tools.GetTalosconfigPath(m.dataDir, instanceName) + + // Retry logic: exponential backoff + delay := 5 * time.Second + maxDelay := 30 * time.Second + deadline := time.Now().Add(timeout) + + for time.Now().Before(deadline) { + // Try to retrieve kubeconfig + cmdKubeconfig := exec.Command("talosctl", "kubeconfig", "--nodes", nodeIP, kubeconfigPath) + tools.WithTalosconfig(cmdKubeconfig, talosconfigPath) + + if output, err := cmdKubeconfig.CombinedOutput(); err == nil { + log.Printf("Successfully retrieved kubeconfig for instance %s", instanceName) + return nil + } else { + // Check if we've exceeded deadline + if !time.Now().Before(deadline) { + return fmt.Errorf("failed to retrieve kubeconfig: %v\nOutput: %s", err, string(output)) + } + + // Wait before retrying + time.Sleep(delay) + + // Increase delay for next iteration (exponential backoff) + delay *= 2 + if delay > maxDelay { + delay = maxDelay + } + } + } + + return fmt.Errorf("failed to retrieve kubeconfig: timeout exceeded") +} + +// RegenerateKubeconfig regenerates the kubeconfig by retrieving it from the cluster +func (m *Manager) RegenerateKubeconfig(instanceName string) error { + instancePath := filepath.Join(m.dataDir, "instances", instanceName) + configPath := filepath.Join(instancePath, "config.yaml") + + yq := tools.NewYQ() + + // Get VIP from config + vipRaw, err := yq.Get(configPath, ".cluster.nodes.control.vip") + if err != nil { + return fmt.Errorf("failed to get VIP: %w", err) + } + + vip := tools.CleanYQOutput(vipRaw) + if vip == "" || vip == "null" { + return fmt.Errorf("control plane VIP not configured in cluster.nodes.control.vip") + } + + log.Printf("Regenerating kubeconfig for instance %s from cluster VIP %s", instanceName, vip) + // Use shorter timeout for manual regeneration (cluster should already be running) + return m.retrieveKubeconfigFromCluster(instanceName, vip, 30*time.Second) +} + +// ConfigureEndpoints updates talosconfig to use VIP and retrieves kubeconfig +func (m *Manager) ConfigureEndpoints(instanceName string, includeNodes bool) error { + instancePath := filepath.Join(m.dataDir, "instances", instanceName) + configPath := filepath.Join(instancePath, "config.yaml") + talosconfigPath := tools.GetTalosconfigPath(m.dataDir, instanceName) + + yq := tools.NewYQ() + + // Get VIP from config + vipRaw, err := yq.Get(configPath, ".cluster.nodes.control.vip") + if err != nil { + return fmt.Errorf("failed to get VIP: %w", err) + } + + vip := tools.CleanYQOutput(vipRaw) + if vip == "" || vip == "null" { + return fmt.Errorf("control plane VIP not configured in cluster.nodes.control.vip") + } + + // Build endpoints list + endpoints := []string{vip} + + // Add control node IPs if requested + if includeNodes { + nodesRaw, err := yq.Exec("eval", ".cluster.nodes.active | to_entries | .[] | select(.value.role == \"controlplane\") | .value.targetIp", configPath) + if err == nil { + nodeIPs := strings.Split(strings.TrimSpace(string(nodesRaw)), "\n") + for _, ip := range nodeIPs { + ip = tools.CleanYQOutput(ip) + if ip != "" && ip != "null" && ip != vip { + endpoints = append(endpoints, ip) + } + } + } + } + + // Update talosconfig endpoint to use VIP + args := append([]string{"config", "endpoint"}, endpoints...) + cmd := exec.Command("talosctl", args...) + tools.WithTalosconfig(cmd, talosconfigPath) + if output, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to set talosctl endpoint: %w\nOutput: %s", err, string(output)) + } + + // Retrieve kubeconfig using the VIP + kubeconfigPath := tools.GetKubeconfigPath(m.dataDir, instanceName) + cmdKubeconfig := exec.Command("talosctl", "kubeconfig", "--nodes", vip, kubeconfigPath) + tools.WithTalosconfig(cmdKubeconfig, talosconfigPath) + if output, err := cmdKubeconfig.CombinedOutput(); err != nil { + return fmt.Errorf("failed to retrieve kubeconfig: %w\nOutput: %s", err, string(output)) + } + + return nil +} + +// GetStatus retrieves cluster status +func (m *Manager) GetStatus(instanceName string) (*ClusterStatus, error) { + status := &ClusterStatus{ + Status: "unknown", + Nodes: 0, + ControlPlaneNodes: 0, + WorkerNodes: 0, + Services: make(map[string]string), + } + + kubeconfigPath := tools.GetKubeconfigPath(m.dataDir, instanceName) + if !storage.FileExists(kubeconfigPath) { + status.Status = "not_bootstrapped" + return status, nil + } + + // Get node count and types using kubectl + cmd := exec.Command("kubectl", "--kubeconfig", kubeconfigPath, "get", "nodes", "-o", "json") + output, err := cmd.Output() + if err != nil { + status.Status = "unreachable" + return status, nil + } + + var nodesResult struct { + Items []struct { + Metadata struct { + Labels map[string]string `json:"labels"` + } `json:"metadata"` + Status struct { + Conditions []struct { + Type string `json:"type"` + Status string `json:"status"` + } `json:"conditions"` + NodeInfo struct { + KubeletVersion string `json:"kubeletVersion"` + } `json:"nodeInfo"` + } `json:"status"` + } `json:"items"` + } + + if err := json.Unmarshal(output, &nodesResult); err != nil { + return status, fmt.Errorf("failed to parse nodes: %w", err) + } + + status.Nodes = len(nodesResult.Items) + status.Status = "ready" + + // Get Kubernetes version from first node + if len(nodesResult.Items) > 0 { + status.KubernetesVersion = nodesResult.Items[0].Status.NodeInfo.KubeletVersion + } + + // Get Talos version using talosctl + talosconfigPath := tools.GetTalosconfigPath(m.dataDir, instanceName) + if storage.FileExists(talosconfigPath) { + cmd := exec.Command("talosctl", "version", "--short", "--client") + tools.WithTalosconfig(cmd, talosconfigPath) + output, err := cmd.Output() + if err == nil { + // Output format: "Talos v1.11.2" + line := strings.TrimSpace(string(output)) + if strings.HasPrefix(line, "Talos") { + parts := strings.Fields(line) + if len(parts) >= 2 { + status.TalosVersion = parts[1] + } + } + } + } + + // Count control plane and worker nodes + for _, node := range nodesResult.Items { + if _, isControl := node.Metadata.Labels["node-role.kubernetes.io/control-plane"]; isControl { + status.ControlPlaneNodes++ + } else { + status.WorkerNodes++ + } + + // Check if node is ready + for _, cond := range node.Status.Conditions { + if cond.Type == "Ready" && cond.Status != "True" { + status.Status = "degraded" + } + } + } + + // Check basic service status + services := []struct { + name string + namespace string + selector string + }{ + {"metallb", "metallb-system", "app=metallb"}, + {"traefik", "traefik", "app.kubernetes.io/name=traefik"}, + {"cert-manager", "cert-manager", "app.kubernetes.io/instance=cert-manager"}, + {"longhorn", "longhorn-system", "app=longhorn-manager"}, + } + + for _, svc := range services { + cmd := exec.Command("kubectl", "--kubeconfig", kubeconfigPath, + "get", "pods", "-n", svc.namespace, "-l", svc.selector, + "-o", "jsonpath={.items[*].status.phase}") + output, err := cmd.Output() + if err != nil || len(output) == 0 { + status.Services[svc.name] = "not_found" + continue + } + + phases := strings.Fields(string(output)) + allRunning := true + for _, phase := range phases { + if phase != "Running" { + allRunning = false + break + } + } + + if allRunning && len(phases) > 0 { + status.Services[svc.name] = "running" + } else { + status.Services[svc.name] = "not_ready" + status.Status = "degraded" + } + } + + return status, nil +} + +// GetKubeconfig returns the kubeconfig for the cluster +func (m *Manager) GetKubeconfig(instanceName string) (string, error) { + kubeconfigPath := tools.GetKubeconfigPath(m.dataDir, instanceName) + + if !storage.FileExists(kubeconfigPath) { + return "", fmt.Errorf("kubeconfig not found - cluster may not be bootstrapped") + } + + data, err := os.ReadFile(kubeconfigPath) + if err != nil { + return "", fmt.Errorf("failed to read kubeconfig: %w", err) + } + + return string(data), nil +} + +// GetTalosconfig returns the talosconfig for the cluster +func (m *Manager) GetTalosconfig(instanceName string) (string, error) { + talosconfigPath := filepath.Join(m.GetGeneratedDir(instanceName), "talosconfig") + + if !storage.FileExists(talosconfigPath) { + return "", fmt.Errorf("talosconfig not found - cluster may not be initialized") + } + + data, err := os.ReadFile(talosconfigPath) + if err != nil { + return "", fmt.Errorf("failed to read talosconfig: %w", err) + } + + return string(data), nil +} + +// Health checks cluster health +func (m *Manager) Health(instanceName string) ([]HealthCheck, error) { + checks := []HealthCheck{} + + // Check 1: Talos config exists + checks = append(checks, HealthCheck{ + Name: "Talos Configuration", + Status: "passing", + Message: "Talos configuration generated", + }) + + // Check 2: Kubeconfig exists + if _, err := m.GetKubeconfig(instanceName); err == nil { + checks = append(checks, HealthCheck{ + Name: "Kubernetes Configuration", + Status: "passing", + Message: "Kubeconfig available", + }) + } else { + checks = append(checks, HealthCheck{ + Name: "Kubernetes Configuration", + Status: "warning", + Message: "Kubeconfig not found", + }) + } + + // Additional health checks would query actual cluster state + // via kubectl and talosctl + + return checks, nil +} + +// HealthCheck represents a single health check result +type HealthCheck struct { + Name string `json:"name"` + Status string `json:"status"` // passing, warning, failing + Message string `json:"message"` +} + +// Reset resets the cluster (dangerous operation) +func (m *Manager) Reset(instanceName string, confirm bool) error { + if !confirm { + return fmt.Errorf("reset requires confirmation") + } + + // This is a destructive operation + // Real implementation would: + // 1. Reset all nodes via talosctl reset + // 2. Remove generated configs + // 3. Clear node status in config.yaml + + generatedDir := m.GetGeneratedDir(instanceName) + if storage.FileExists(generatedDir) { + if err := os.RemoveAll(generatedDir); err != nil { + return fmt.Errorf("failed to remove generated configs: %w", err) + } + } + + return nil +} + +// ConfigureContext configures talosctl context for the cluster +func (m *Manager) ConfigureContext(instanceName, clusterName string) error { + talosconfigPath := filepath.Join(m.GetGeneratedDir(instanceName), "talosconfig") + + if !storage.FileExists(talosconfigPath) { + return fmt.Errorf("talosconfig not found") + } + + // Merge talosconfig into user's talosctl config + cmd := exec.Command("talosctl", "config", "merge", talosconfigPath) + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to merge talosconfig: %w\nOutput: %s", err, string(output)) + } + + // Set context + cmd = exec.Command("talosctl", "config", "context", clusterName) + output, err = cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to set context: %w\nOutput: %s", err, string(output)) + } + + return nil +} + +// HasContext checks if talosctl context exists +func (m *Manager) HasContext(clusterName string) (bool, error) { + cmd := exec.Command("talosctl", "config", "contexts") + output, err := cmd.CombinedOutput() + if err != nil { + return false, fmt.Errorf("failed to list contexts: %w", err) + } + + return strings.Contains(string(output), clusterName), nil +} diff --git a/internal/config/config.go b/internal/config/config.go new file mode 100644 index 0000000..04d06cc --- /dev/null +++ b/internal/config/config.go @@ -0,0 +1,168 @@ +package config + +import ( + "fmt" + "os" + "path/filepath" + + "gopkg.in/yaml.v3" +) + +// GlobalConfig represents the main configuration structure +type GlobalConfig struct { + Wildcloud struct { + Repository string `yaml:"repository" json:"repository"` + CurrentPhase string `yaml:"currentPhase" json:"currentPhase"` + CompletedPhases []string `yaml:"completedPhases" json:"completedPhases"` + } `yaml:"wildcloud" json:"wildcloud"` + Server struct { + Port int `yaml:"port" json:"port"` + Host string `yaml:"host" json:"host"` + } `yaml:"server" json:"server"` + Operator struct { + Email string `yaml:"email" json:"email"` + } `yaml:"operator" json:"operator"` + Cloud struct { + DNS struct { + IP string `yaml:"ip" json:"ip"` + ExternalResolver string `yaml:"externalResolver" json:"externalResolver"` + } `yaml:"dns" json:"dns"` + Router struct { + IP string `yaml:"ip" json:"ip"` + DynamicDns string `yaml:"dynamicDns" json:"dynamicDns"` + } `yaml:"router" json:"router"` + Dnsmasq struct { + Interface string `yaml:"interface" json:"interface"` + } `yaml:"dnsmasq" json:"dnsmasq"` + } `yaml:"cloud" json:"cloud"` + Cluster struct { + EndpointIP string `yaml:"endpointIp" json:"endpointIp"` + Nodes struct { + Talos struct { + Version string `yaml:"version" json:"version"` + } `yaml:"talos" json:"talos"` + } `yaml:"nodes" json:"nodes"` + } `yaml:"cluster" json:"cluster"` +} + +// LoadGlobalConfig loads configuration from the specified path +func LoadGlobalConfig(configPath string) (*GlobalConfig, error) { + data, err := os.ReadFile(configPath) + if err != nil { + return nil, fmt.Errorf("reading config file %s: %w", configPath, err) + } + + config := &GlobalConfig{} + if err := yaml.Unmarshal(data, config); err != nil { + return nil, fmt.Errorf("parsing config file: %w", err) + } + + // Set defaults + if config.Server.Port == 0 { + config.Server.Port = 5055 + } + if config.Server.Host == "" { + config.Server.Host = "0.0.0.0" + } + + return config, nil +} + +// SaveGlobalConfig saves the configuration to the specified path +func SaveGlobalConfig(config *GlobalConfig, configPath string) error { + // Ensure the directory exists + if err := os.MkdirAll(filepath.Dir(configPath), 0755); err != nil { + return fmt.Errorf("creating config directory: %w", err) + } + + data, err := yaml.Marshal(config) + if err != nil { + return fmt.Errorf("marshaling config: %w", err) + } + + return os.WriteFile(configPath, data, 0644) +} + +// IsEmpty checks if the configuration is empty or uninitialized +func (c *GlobalConfig) IsEmpty() bool { + if c == nil { + return true + } + + // Check if any essential fields are empty + return c.Cloud.DNS.IP == "" || c.Cluster.Nodes.Talos.Version == "" +} + +type NodeConfig struct { + Role string `yaml:"role" json:"role"` + Interface string `yaml:"interface" json:"interface"` + Disk string `yaml:"disk" json:"disk"` + CurrentIp string `yaml:"currentIp" json:"currentIp"` +} + +type InstanceConfig struct { + BaseDomain string `yaml:"baseDomain" json:"baseDomain"` + Domain string `yaml:"domain" json:"domain"` + InternalDomain string `yaml:"internalDomain" json:"internalDomain"` + Backup struct { + Root string `yaml:"root" json:"root"` + } `yaml:"backup" json:"backup"` + DHCPRange string `yaml:"dhcpRange" json:"dhcpRange"` + NFS struct { + Host string `yaml:"host" json:"host"` + MediaPath string `yaml:"mediaPath" json:"mediaPath"` + } `yaml:"nfs" json:"nfs"` + Cluster struct { + Name string `yaml:"name" json:"name"` + LoadBalancerIp string `yaml:"loadBalancerIp" json:"loadBalancerIp"` + IpAddressPool string `yaml:"ipAddressPool" json:"ipAddressPool"` + CertManager struct { + Cloudflare struct { + Domain string `yaml:"domain" json:"domain"` + ZoneID string `yaml:"zoneID" json:"zoneID"` + } `yaml:"cloudflare" json:"cloudflare"` + } `yaml:"certManager" json:"certManager"` + ExternalDns struct { + OwnerId string `yaml:"ownerId" json:"ownerId"` + } `yaml:"externalDns" json:"externalDns"` + HostnamePrefix string `yaml:"hostnamePrefix" json:"hostnamePrefix"` + Nodes struct { + Talos struct { + Version string `yaml:"version" json:"version"` + SchematicId string `yaml:"schematicId" json:"schematicId"` + } `yaml:"talos" json:"talos"` + Control struct { + Vip string `yaml:"vip" json:"vip"` + } `yaml:"control" json:"control"` + ActiveNodes []map[string]NodeConfig `yaml:"activeNodes" json:"activeNodes"` + } + } `yaml:"cluster" json:"cluster"` +} + +func LoadCloudConfig(configPath string) (*InstanceConfig, error) { + data, err := os.ReadFile(configPath) + if err != nil { + return nil, fmt.Errorf("reading config file %s: %w", configPath, err) + } + + config := &InstanceConfig{} + if err := yaml.Unmarshal(data, config); err != nil { + return nil, fmt.Errorf("parsing config file: %w", err) + } + + return config, nil +} + +func SaveCloudConfig(config *InstanceConfig, configPath string) error { + // Ensure the directory exists + if err := os.MkdirAll(filepath.Dir(configPath), 0755); err != nil { + return fmt.Errorf("creating config directory: %w", err) + } + + data, err := yaml.Marshal(config) + if err != nil { + return fmt.Errorf("marshaling config: %w", err) + } + + return os.WriteFile(configPath, data, 0644) +} diff --git a/internal/config/manager.go b/internal/config/manager.go new file mode 100644 index 0000000..6609afc --- /dev/null +++ b/internal/config/manager.go @@ -0,0 +1,167 @@ +package config + +import ( + "fmt" + "path/filepath" + + "github.com/wild-cloud/wild-central/daemon/internal/storage" + "github.com/wild-cloud/wild-central/daemon/internal/tools" +) + +// Manager handles configuration file operations with idempotency +type Manager struct { + yq *tools.YQ +} + +// NewManager creates a new config manager +func NewManager() *Manager { + return &Manager{ + yq: tools.NewYQ(), + } +} + +// EnsureInstanceConfig ensures an instance config file exists with proper structure +func (m *Manager) EnsureInstanceConfig(instancePath string) error { + configPath := filepath.Join(instancePath, "config.yaml") + + // Check if config already exists + if storage.FileExists(configPath) { + // Validate existing config + if err := m.yq.Validate(configPath); err != nil { + return fmt.Errorf("invalid config file: %w", err) + } + return nil + } + + // Create minimal config structure + initialConfig := `# Wild Cloud Instance Configuration +baseDomain: "" +domain: "" +internalDomain: "" +dhcpRange: "" +backup: + root: "" +nfs: + host: "" + mediaPath: "" +cluster: + name: "" + loadBalancerIp: "" + ipAddressPool: "" + hostnamePrefix: "" + certManager: + cloudflare: + domain: "" + zoneID: "" + externalDns: + ownerId: "" + nodes: + talos: + version: "" + schematicId: "" + control: + vip: "" + activeNodes: [] +` + + // Ensure instance directory exists + if err := storage.EnsureDir(instancePath, 0755); err != nil { + return err + } + + // Write config with proper permissions + if err := storage.WriteFile(configPath, []byte(initialConfig), 0644); err != nil { + return err + } + + return nil +} + +// GetConfigValue retrieves a value from a config file +func (m *Manager) GetConfigValue(configPath, key string) (string, error) { + if !storage.FileExists(configPath) { + return "", fmt.Errorf("config file not found: %s", configPath) + } + + value, err := m.yq.Get(configPath, fmt.Sprintf(".%s", key)) + if err != nil { + return "", fmt.Errorf("getting config value %s: %w", key, err) + } + + return value, nil +} + +// SetConfigValue sets a value in a config file +func (m *Manager) SetConfigValue(configPath, key, value string) error { + if !storage.FileExists(configPath) { + return fmt.Errorf("config file not found: %s", configPath) + } + + // Acquire lock before modifying + lockPath := configPath + ".lock" + return storage.WithLock(lockPath, func() error { + return m.yq.Set(configPath, fmt.Sprintf(".%s", key), value) + }) +} + +// EnsureConfigValue sets a value only if it's not already set (idempotent) +func (m *Manager) EnsureConfigValue(configPath, key, value string) error { + if !storage.FileExists(configPath) { + return fmt.Errorf("config file not found: %s", configPath) + } + + // Check if value already set + currentValue, err := m.GetConfigValue(configPath, key) + if err == nil && currentValue != "" && currentValue != "null" { + // Value already set, skip + return nil + } + + // Set the value + return m.SetConfigValue(configPath, key, value) +} + +// ValidateConfig validates a config file +func (m *Manager) ValidateConfig(configPath string) error { + if !storage.FileExists(configPath) { + return fmt.Errorf("config file not found: %s", configPath) + } + + return m.yq.Validate(configPath) +} + +// CopyConfig copies a config file to a new location +func (m *Manager) CopyConfig(srcPath, dstPath string) error { + if !storage.FileExists(srcPath) { + return fmt.Errorf("source config file not found: %s", srcPath) + } + + // Read source + content, err := storage.ReadFile(srcPath) + if err != nil { + return err + } + + // Ensure destination directory exists + if err := storage.EnsureDir(filepath.Dir(dstPath), 0755); err != nil { + return err + } + + // Write destination + return storage.WriteFile(dstPath, content, 0644) +} + +// GetInstanceConfigPath returns the path to an instance's config file +func GetInstanceConfigPath(dataDir, instanceName string) string { + return filepath.Join(dataDir, "instances", instanceName, "config.yaml") +} + +// GetInstanceSecretsPath returns the path to an instance's secrets file +func GetInstanceSecretsPath(dataDir, instanceName string) string { + return filepath.Join(dataDir, "instances", instanceName, "secrets.yaml") +} + +// GetInstancePath returns the path to an instance directory +func GetInstancePath(dataDir, instanceName string) string { + return filepath.Join(dataDir, "instances", instanceName) +} diff --git a/internal/context/context.go b/internal/context/context.go new file mode 100644 index 0000000..3dc5023 --- /dev/null +++ b/internal/context/context.go @@ -0,0 +1,140 @@ +package context + +import ( + "fmt" + "path/filepath" + "strings" + + "github.com/wild-cloud/wild-central/daemon/internal/storage" +) + +// Manager handles current instance context tracking +type Manager struct { + dataDir string +} + +// NewManager creates a new context manager +func NewManager(dataDir string) *Manager { + return &Manager{ + dataDir: dataDir, + } +} + +// GetContextFilePath returns the path to the context file +func (m *Manager) GetContextFilePath() string { + return filepath.Join(m.dataDir, "current-context") +} + +// GetCurrentContext retrieves the name of the current instance context +func (m *Manager) GetCurrentContext() (string, error) { + contextFile := m.GetContextFilePath() + + if !storage.FileExists(contextFile) { + return "", fmt.Errorf("no current context set") + } + + content, err := storage.ReadFile(contextFile) + if err != nil { + return "", fmt.Errorf("reading context file: %w", err) + } + + contextName := strings.TrimSpace(string(content)) + if contextName == "" { + return "", fmt.Errorf("context file is empty") + } + + return contextName, nil +} + +// SetCurrentContext sets the current instance context +func (m *Manager) SetCurrentContext(instanceName string) error { + if instanceName == "" { + return fmt.Errorf("instance name cannot be empty") + } + + // Verify instance exists + instancePath := filepath.Join(m.dataDir, "instances", instanceName) + if !storage.FileExists(instancePath) { + return fmt.Errorf("instance %s does not exist", instanceName) + } + + contextFile := m.GetContextFilePath() + + // Ensure data directory exists + if err := storage.EnsureDir(m.dataDir, 0755); err != nil { + return err + } + + // Acquire lock before writing + lockPath := contextFile + ".lock" + return storage.WithLock(lockPath, func() error { + return storage.WriteFile(contextFile, []byte(instanceName), 0644) + }) +} + +// ClearCurrentContext removes the current context +func (m *Manager) ClearCurrentContext() error { + contextFile := m.GetContextFilePath() + + if !storage.FileExists(contextFile) { + // Already cleared + return nil + } + + // Acquire lock before deleting + lockPath := contextFile + ".lock" + return storage.WithLock(lockPath, func() error { + return storage.WriteFile(contextFile, []byte(""), 0644) + }) +} + +// HasCurrentContext checks if a current context is set +func (m *Manager) HasCurrentContext() bool { + _, err := m.GetCurrentContext() + return err == nil +} + +// ValidateContext checks if the current context is valid (instance exists) +func (m *Manager) ValidateContext() error { + contextName, err := m.GetCurrentContext() + if err != nil { + return err + } + + instancePath := filepath.Join(m.dataDir, "instances", contextName) + if !storage.FileExists(instancePath) { + return fmt.Errorf("current context %s points to non-existent instance", contextName) + } + + return nil +} + +// GetCurrentInstancePath returns the path to the current instance directory +func (m *Manager) GetCurrentInstancePath() (string, error) { + contextName, err := m.GetCurrentContext() + if err != nil { + return "", err + } + + return filepath.Join(m.dataDir, "instances", contextName), nil +} + +// GetCurrentInstanceConfigPath returns the path to the current instance's config file +func (m *Manager) GetCurrentInstanceConfigPath() (string, error) { + instancePath, err := m.GetCurrentInstancePath() + if err != nil { + return "", err + } + + return filepath.Join(instancePath, "config.yaml"), nil +} + +// GetCurrentInstanceSecretsPath returns the path to the current instance's secrets file +func (m *Manager) GetCurrentInstanceSecretsPath() (string, error) { + instancePath, err := m.GetCurrentInstancePath() + if err != nil { + return "", err + } + + return filepath.Join(instancePath, "secrets.yaml"), nil +} diff --git a/internal/context/context_test.go b/internal/context/context_test.go new file mode 100644 index 0000000..0489c7e --- /dev/null +++ b/internal/context/context_test.go @@ -0,0 +1,100 @@ +package context + +import ( + "os" + "path/filepath" + "testing" +) + +func TestManager_GetSetCurrentContext(t *testing.T) { + tmpDir := t.TempDir() + m := NewManager(tmpDir) + + // Create test instances + instancesDir := filepath.Join(tmpDir, "instances") + instances := []string{"cloud1", "cloud2"} + for _, name := range instances { + instancePath := filepath.Join(instancesDir, name) + err := os.MkdirAll(instancePath, 0755) + if err != nil { + t.Fatalf("Failed to create instance dir: %v", err) + } + } + + // Initially should have no context + _, err := m.GetCurrentContext() + if err == nil { + t.Fatalf("Should have no context initially") + } + + // Set context + err = m.SetCurrentContext("cloud1") + if err != nil { + t.Fatalf("SetCurrentContext failed: %v", err) + } + + // Get context + ctx, err := m.GetCurrentContext() + if err != nil { + t.Fatalf("GetCurrentContext failed: %v", err) + } + if ctx != "cloud1" { + t.Errorf("Wrong context: got %q, want %q", ctx, "cloud1") + } + + // Change context + err = m.SetCurrentContext("cloud2") + if err != nil { + t.Fatalf("SetCurrentContext failed: %v", err) + } + + ctx, err = m.GetCurrentContext() + if err != nil { + t.Fatalf("GetCurrentContext failed: %v", err) + } + if ctx != "cloud2" { + t.Errorf("Wrong context: got %q, want %q", ctx, "cloud2") + } +} + +func TestManager_SetCurrentContext_ValidationError(t *testing.T) { + tmpDir := t.TempDir() + m := NewManager(tmpDir) + + // Trying to set context to non-existent instance should fail + err := m.SetCurrentContext("non-existent") + if err == nil { + t.Fatalf("SetCurrentContext should fail for non-existent instance") + } +} + +func TestManager_ClearCurrentContext(t *testing.T) { + tmpDir := t.TempDir() + m := NewManager(tmpDir) + + // Create test instance + instancesDir := filepath.Join(tmpDir, "instances") + instancePath := filepath.Join(instancesDir, "test-cloud") + err := os.MkdirAll(instancePath, 0755) + if err != nil { + t.Fatalf("Failed to create instance dir: %v", err) + } + + // Set context + err = m.SetCurrentContext("test-cloud") + if err != nil { + t.Fatalf("SetCurrentContext failed: %v", err) + } + + // Clear context + err = m.ClearCurrentContext() + if err != nil { + t.Fatalf("ClearCurrentContext failed: %v", err) + } + + // Context should be gone + _, err = m.GetCurrentContext() + if err == nil { + t.Fatalf("Context should be cleared") + } +} diff --git a/internal/data/paths.go b/internal/data/paths.go new file mode 100644 index 0000000..d0fcb7e --- /dev/null +++ b/internal/data/paths.go @@ -0,0 +1,105 @@ +package data + +import ( + "fmt" + "log" + "os" + "path/filepath" +) + +// Paths represents the data directory paths configuration +type Paths struct { + DataDir string + ConfigFile string + CloudDir string + LogsDir string + AssetsDir string + DnsmasqConf string +} + +// Manager handles data directory management +type Manager struct { + dataDir string + isDev bool +} + +// NewManager creates a new data manager +func NewManager() *Manager { + return &Manager{} +} + +func (m *Manager) Initialize() error { + m.isDev = m.isDevelopmentMode() + + var dataDir string + if m.isDev { + cwd, err := os.Getwd() + if err != nil { + return fmt.Errorf("failed to get current directory: %w", err) + } + if os.Getenv("WILD_CENTRAL_DATA") != "" { + dataDir = os.Getenv("WILD_CENTRAL_DATA") + } else { + dataDir = filepath.Join(cwd, "data") + } + log.Printf("Running in development mode, using data directory: %s", dataDir) + } else { + dataDir = "/var/lib/wild-cloud-central" + log.Printf("Running in production mode, using data directory: %s", dataDir) + } + + m.dataDir = dataDir + + // Create directory structure + paths := m.GetPaths() + + // Create all necessary directories + for _, dir := range []string{paths.DataDir, paths.LogsDir, paths.AssetsDir} { + if err := os.MkdirAll(dir, 0755); err != nil { + return fmt.Errorf("failed to create directory %s: %w", dir, err) + } + } + + log.Printf("Data directory structure initialized at: %s", dataDir) + return nil +} + +// isDevelopmentMode detects if we're running in development mode +func (m *Manager) isDevelopmentMode() bool { + // Check multiple indicators for development mode + + // 1. Check if GO_ENV is set to development + if env := os.Getenv("WILD_CENTRAL_ENV"); env == "development" { + return true + } + + return false +} + +// GetPaths returns the appropriate paths for the current environment +func (m *Manager) GetPaths() Paths { + if m.isDev { + return Paths{ + DataDir: m.dataDir, + ConfigFile: filepath.Join(m.dataDir, "config.yaml"), + CloudDir: filepath.Join(m.dataDir, "clouds"), + LogsDir: filepath.Join(m.dataDir, "logs"), + AssetsDir: filepath.Join(m.dataDir, "assets"), + DnsmasqConf: filepath.Join(m.dataDir, "dnsmasq.conf"), + } + } else { + return Paths{ + DataDir: m.dataDir, + ConfigFile: "/etc/wild-cloud/config.yaml", + CloudDir: "/srv/wild-cloud", + LogsDir: "/var/log/wild-cloud", + AssetsDir: "/var/www/html/wild-cloud", + DnsmasqConf: "/etc/dnsmasq.conf", + } + } +} + +// GetDataDir returns the current data directory +func (m *Manager) GetDataDir() string { + return m.dataDir +} diff --git a/internal/discovery/discovery.go b/internal/discovery/discovery.go new file mode 100644 index 0000000..e3bce54 --- /dev/null +++ b/internal/discovery/discovery.go @@ -0,0 +1,247 @@ +package discovery + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "sync" + "time" + + "github.com/wild-cloud/wild-central/daemon/internal/node" + "github.com/wild-cloud/wild-central/daemon/internal/storage" + "github.com/wild-cloud/wild-central/daemon/internal/tools" +) + +// Manager handles node discovery operations +type Manager struct { + dataDir string + nodeMgr *node.Manager + talosctl *tools.Talosctl + discoveryMu sync.Mutex +} + +// NewManager creates a new discovery manager +func NewManager(dataDir string, instanceName string) *Manager { + // Get talosconfig path for the instance + talosconfigPath := filepath.Join(dataDir, "instances", instanceName, "setup", "cluster-nodes", "generated", "talosconfig") + + return &Manager{ + dataDir: dataDir, + nodeMgr: node.NewManager(dataDir), + talosctl: tools.NewTalosconfigWithConfig(talosconfigPath), + } +} + +// DiscoveredNode represents a discovered node on the network +type DiscoveredNode struct { + IP string `json:"ip"` + Hostname string `json:"hostname,omitempty"` + MaintenanceMode bool `json:"maintenance_mode"` + Version string `json:"version,omitempty"` + Interface string `json:"interface,omitempty"` + Disks []string `json:"disks,omitempty"` +} + +// DiscoveryStatus represents the current state of discovery +type DiscoveryStatus struct { + Active bool `json:"active"` + StartedAt time.Time `json:"started_at,omitempty"` + NodesFound []DiscoveredNode `json:"nodes_found"` + Error string `json:"error,omitempty"` +} + +// GetDiscoveryDir returns the discovery directory for an instance +func (m *Manager) GetDiscoveryDir(instanceName string) string { + return filepath.Join(m.dataDir, "instances", instanceName, "discovery") +} + +// GetDiscoveryStatusPath returns the path to discovery status file +func (m *Manager) GetDiscoveryStatusPath(instanceName string) string { + return filepath.Join(m.GetDiscoveryDir(instanceName), "status.json") +} + +// GetDiscoveryStatus returns current discovery operation status +func (m *Manager) GetDiscoveryStatus(instanceName string) (*DiscoveryStatus, error) { + statusPath := m.GetDiscoveryStatusPath(instanceName) + + if !storage.FileExists(statusPath) { + // No discovery has been run yet + return &DiscoveryStatus{ + Active: false, + NodesFound: []DiscoveredNode{}, + }, nil + } + + data, err := os.ReadFile(statusPath) + if err != nil { + return nil, fmt.Errorf("failed to read discovery status: %w", err) + } + + var status DiscoveryStatus + if err := json.Unmarshal(data, &status); err != nil { + return nil, fmt.Errorf("failed to parse discovery status: %w", err) + } + + return &status, nil +} + +// StartDiscovery initiates an async discovery operation +func (m *Manager) StartDiscovery(instanceName string, ipList []string) error { + m.discoveryMu.Lock() + defer m.discoveryMu.Unlock() + + // Check if discovery is already running + status, err := m.GetDiscoveryStatus(instanceName) + if err != nil { + return err + } + + if status.Active { + return fmt.Errorf("discovery already in progress") + } + + // Initialize discovery status + newStatus := &DiscoveryStatus{ + Active: true, + StartedAt: time.Now(), + NodesFound: []DiscoveredNode{}, + } + + if err := m.writeDiscoveryStatus(instanceName, newStatus); err != nil { + return err + } + + // Start discovery in background + go m.runDiscovery(instanceName, ipList) + + return nil +} + +// runDiscovery performs the actual discovery operation +func (m *Manager) runDiscovery(instanceName string, ipList []string) { + defer func() { + // Mark discovery as complete + m.discoveryMu.Lock() + defer m.discoveryMu.Unlock() + + status, _ := m.GetDiscoveryStatus(instanceName) + status.Active = false + m.writeDiscoveryStatus(instanceName, status) + }() + + // Discover nodes by probing each IP + discoveredNodes := []DiscoveredNode{} + + for _, ip := range ipList { + node, err := m.probeNode(ip) + if err != nil { + // Node not reachable or not a Talos node + continue + } + + discoveredNodes = append(discoveredNodes, *node) + + // Update status incrementally + m.discoveryMu.Lock() + status, _ := m.GetDiscoveryStatus(instanceName) + status.NodesFound = discoveredNodes + m.writeDiscoveryStatus(instanceName, status) + m.discoveryMu.Unlock() + } +} + +// probeNode attempts to detect if a node is running Talos +func (m *Manager) probeNode(ip string) (*DiscoveredNode, error) { + // Attempt to get version (quick connectivity test) + version, err := m.talosctl.GetVersion(ip, false) + if err != nil { + return nil, err + } + + // Node is reachable, get hardware info + hwInfo, err := m.nodeMgr.DetectHardware(ip) + if err != nil { + // Still count it as discovered even if we can't get full hardware + return &DiscoveredNode{ + IP: ip, + MaintenanceMode: false, + Version: version, + }, nil + } + + // Extract just the disk paths for discovery output + diskPaths := make([]string, len(hwInfo.Disks)) + for i, disk := range hwInfo.Disks { + diskPaths[i] = disk.Path + } + + return &DiscoveredNode{ + IP: ip, + MaintenanceMode: hwInfo.MaintenanceMode, + Version: version, + Interface: hwInfo.Interface, + Disks: diskPaths, + }, nil +} + +// DiscoverNodes performs synchronous discovery (for simple cases) +func (m *Manager) DiscoverNodes(instanceName string, ipList []string) ([]DiscoveredNode, error) { + nodes := []DiscoveredNode{} + + for _, ip := range ipList { + node, err := m.probeNode(ip) + if err != nil { + // Skip unreachable nodes + continue + } + nodes = append(nodes, *node) + } + + // Save results + status := &DiscoveryStatus{ + Active: false, + StartedAt: time.Now(), + NodesFound: nodes, + } + + if err := m.writeDiscoveryStatus(instanceName, status); err != nil { + return nodes, err // Return nodes even if we can't save status + } + + return nodes, nil +} + +// ClearDiscoveryStatus removes discovery status file +func (m *Manager) ClearDiscoveryStatus(instanceName string) error { + statusPath := m.GetDiscoveryStatusPath(instanceName) + + if !storage.FileExists(statusPath) { + return nil // Already cleared, idempotent + } + + return os.Remove(statusPath) +} + +// writeDiscoveryStatus writes discovery status to disk +func (m *Manager) writeDiscoveryStatus(instanceName string, status *DiscoveryStatus) error { + discoveryDir := m.GetDiscoveryDir(instanceName) + + // Ensure directory exists + if err := storage.EnsureDir(discoveryDir, 0755); err != nil { + return err + } + + statusPath := m.GetDiscoveryStatusPath(instanceName) + + data, err := json.MarshalIndent(status, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal discovery status: %w", err) + } + + if err := storage.WriteFile(statusPath, data, 0644); err != nil { + return fmt.Errorf("failed to write discovery status: %w", err) + } + + return nil +} diff --git a/internal/dnsmasq/config.go b/internal/dnsmasq/config.go new file mode 100644 index 0000000..3979ffb --- /dev/null +++ b/internal/dnsmasq/config.go @@ -0,0 +1,73 @@ +package dnsmasq + +import ( + "fmt" + "log" + "os" + "os/exec" + + "github.com/wild-cloud/wild-central/daemon/internal/config" +) + +// ConfigGenerator handles dnsmasq configuration generation +type ConfigGenerator struct{} + +// NewConfigGenerator creates a new dnsmasq config generator +func NewConfigGenerator() *ConfigGenerator { + return &ConfigGenerator{} +} + +// Generate creates a dnsmasq configuration from the app config +func (g *ConfigGenerator) Generate(cfg *config.GlobalConfig, clouds []config.InstanceConfig) string { + + resolution_section := "" + for _, cloud := range clouds { + resolution_section += fmt.Sprintf("local=/%s/\naddress=/%s/%s\n", cloud.Domain, cloud.Domain, cfg.Cluster.EndpointIP) + resolution_section += fmt.Sprintf("local=/%s/\naddress=/%s/%s\n", cloud.InternalDomain, cloud.InternalDomain, cfg.Cluster.EndpointIP) + } + + template := `# Configuration file for dnsmasq. + +# Basic Settings +interface=%s +listen-address=%s +domain-needed +bogus-priv +no-resolv + +# DNS Local Resolution - Central server handles these domains authoritatively +%s +server=1.1.1.1 +server=8.8.8.8 + +log-queries +log-dhcp +` + + return fmt.Sprintf(template, + cfg.Cloud.Dnsmasq.Interface, + cfg.Cloud.DNS.IP, + resolution_section, + ) +} + +// WriteConfig writes the dnsmasq configuration to the specified path +func (g *ConfigGenerator) WriteConfig(cfg *config.GlobalConfig, clouds []config.InstanceConfig, configPath string) error { + configContent := g.Generate(cfg, clouds) + + log.Printf("Writing dnsmasq config to: %s", configPath) + if err := os.WriteFile(configPath, []byte(configContent), 0644); err != nil { + return fmt.Errorf("writing dnsmasq config: %w", err) + } + + return nil +} + +// RestartService restarts the dnsmasq service +func (g *ConfigGenerator) RestartService() error { + cmd := exec.Command("sudo", "/usr/bin/systemctl", "restart", "dnsmasq.service") + if err := cmd.Run(); err != nil { + return fmt.Errorf("failed to restart dnsmasq: %w", err) + } + return nil +} diff --git a/internal/instance/instance.go b/internal/instance/instance.go new file mode 100644 index 0000000..b99b67c --- /dev/null +++ b/internal/instance/instance.go @@ -0,0 +1,251 @@ +package instance + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/wild-cloud/wild-central/daemon/internal/config" + "github.com/wild-cloud/wild-central/daemon/internal/context" + "github.com/wild-cloud/wild-central/daemon/internal/secrets" + "github.com/wild-cloud/wild-central/daemon/internal/storage" +) + +// Manager handles instance lifecycle operations +type Manager struct { + dataDir string + configMgr *config.Manager + secretsMgr *secrets.Manager + contextMgr *context.Manager +} + +// NewManager creates a new instance manager +func NewManager(dataDir string) *Manager { + return &Manager{ + dataDir: dataDir, + configMgr: config.NewManager(), + secretsMgr: secrets.NewManager(), + contextMgr: context.NewManager(dataDir), + } +} + +// Instance represents a Wild Cloud instance +type Instance struct { + Name string + Path string + ConfigPath string + SecretsPath string +} + +// GetInstancePath returns the path to an instance directory +func (m *Manager) GetInstancePath(name string) string { + return filepath.Join(m.dataDir, "instances", name) +} + +// GetInstanceConfigPath returns the path to an instance's config file +func (m *Manager) GetInstanceConfigPath(name string) string { + return filepath.Join(m.GetInstancePath(name), "config.yaml") +} + +// GetInstanceSecretsPath returns the path to an instance's secrets file +func (m *Manager) GetInstanceSecretsPath(name string) string { + return filepath.Join(m.GetInstancePath(name), "secrets.yaml") +} + +// InstanceExists checks if an instance exists +func (m *Manager) InstanceExists(name string) bool { + return storage.FileExists(m.GetInstancePath(name)) +} + +// CreateInstance creates a new Wild Cloud instance with initial structure +func (m *Manager) CreateInstance(name string) error { + if name == "" { + return fmt.Errorf("instance name cannot be empty") + } + + instancePath := m.GetInstancePath(name) + + // Check if instance already exists (idempotency - just return success) + if m.InstanceExists(name) { + return nil + } + + // Acquire lock for instance creation + lockPath := filepath.Join(m.dataDir, "instances", ".lock") + return storage.WithLock(lockPath, func() error { + // Create instance directory + if err := storage.EnsureDir(instancePath, 0755); err != nil { + return fmt.Errorf("creating instance directory: %w", err) + } + + // Create config file + if err := m.configMgr.EnsureInstanceConfig(instancePath); err != nil { + return fmt.Errorf("creating config file: %w", err) + } + + // Create secrets file + if err := m.secretsMgr.EnsureSecretsFile(instancePath); err != nil { + return fmt.Errorf("creating secrets file: %w", err) + } + + // Create subdirectories + subdirs := []string{"talos", "k8s", "logs", "backups"} + for _, subdir := range subdirs { + subdirPath := filepath.Join(instancePath, subdir) + if err := storage.EnsureDir(subdirPath, 0755); err != nil { + return fmt.Errorf("creating subdirectory %s: %w", subdir, err) + } + } + + return nil + }) +} + +// DeleteInstance removes a Wild Cloud instance +func (m *Manager) DeleteInstance(name string) error { + if name == "" { + return fmt.Errorf("instance name cannot be empty") + } + + instancePath := m.GetInstancePath(name) + + // Check if instance exists + if !m.InstanceExists(name) { + return fmt.Errorf("instance %s does not exist", name) + } + + // Clear context if this is the current instance + currentContext, err := m.contextMgr.GetCurrentContext() + if err == nil && currentContext == name { + if err := m.contextMgr.ClearCurrentContext(); err != nil { + return fmt.Errorf("clearing current context: %w", err) + } + } + + // Acquire lock for instance deletion + lockPath := filepath.Join(m.dataDir, "instances", ".lock") + return storage.WithLock(lockPath, func() error { + // Remove instance directory + if err := os.RemoveAll(instancePath); err != nil { + return fmt.Errorf("removing instance directory: %w", err) + } + + return nil + }) +} + +// ListInstances returns a list of all instance names +func (m *Manager) ListInstances() ([]string, error) { + instancesDir := filepath.Join(m.dataDir, "instances") + + // Ensure instances directory exists + if !storage.FileExists(instancesDir) { + return []string{}, nil + } + + entries, err := os.ReadDir(instancesDir) + if err != nil { + return nil, fmt.Errorf("reading instances directory: %w", err) + } + + var instances []string + for _, entry := range entries { + if entry.IsDir() && entry.Name() != ".lock" { + instances = append(instances, entry.Name()) + } + } + + return instances, nil +} + +// GetInstance retrieves instance information +func (m *Manager) GetInstance(name string) (*Instance, error) { + if !m.InstanceExists(name) { + return nil, fmt.Errorf("instance %s does not exist", name) + } + + return &Instance{ + Name: name, + Path: m.GetInstancePath(name), + ConfigPath: m.GetInstanceConfigPath(name), + SecretsPath: m.GetInstanceSecretsPath(name), + }, nil +} + +// GetCurrentInstance returns the current context instance +func (m *Manager) GetCurrentInstance() (*Instance, error) { + name, err := m.contextMgr.GetCurrentContext() + if err != nil { + return nil, err + } + + return m.GetInstance(name) +} + +// SetCurrentInstance sets the current instance context +func (m *Manager) SetCurrentInstance(name string) error { + if !m.InstanceExists(name) { + return fmt.Errorf("instance %s does not exist", name) + } + + return m.contextMgr.SetCurrentContext(name) +} + +// ValidateInstance checks if an instance has valid structure +func (m *Manager) ValidateInstance(name string) error { + if !m.InstanceExists(name) { + return fmt.Errorf("instance %s does not exist", name) + } + + instance, err := m.GetInstance(name) + if err != nil { + return err + } + + // Check config file exists and is valid + if !storage.FileExists(instance.ConfigPath) { + return fmt.Errorf("config file missing for instance %s", name) + } + + if err := m.configMgr.ValidateConfig(instance.ConfigPath); err != nil { + return fmt.Errorf("invalid config for instance %s: %w", name, err) + } + + // Check secrets file exists with proper permissions + if !storage.FileExists(instance.SecretsPath) { + return fmt.Errorf("secrets file missing for instance %s", name) + } + + // Verify secrets file permissions + info, err := os.Stat(instance.SecretsPath) + if err != nil { + return fmt.Errorf("checking secrets file permissions: %w", err) + } + + if info.Mode().Perm() != 0600 { + return fmt.Errorf("secrets file has incorrect permissions (expected 0600, got %04o)", info.Mode().Perm()) + } + + return nil +} + +// InitializeInstance performs initial setup for a newly created instance +func (m *Manager) InitializeInstance(name string, initialConfig map[string]string) error { + if !m.InstanceExists(name) { + return fmt.Errorf("instance %s does not exist", name) + } + + instance, err := m.GetInstance(name) + if err != nil { + return err + } + + // Set initial config values + for key, value := range initialConfig { + if err := m.configMgr.SetConfigValue(instance.ConfigPath, key, value); err != nil { + return fmt.Errorf("setting config value %s: %w", key, err) + } + } + + return nil +} diff --git a/internal/instance/instance_test.go b/internal/instance/instance_test.go new file mode 100644 index 0000000..46931c4 --- /dev/null +++ b/internal/instance/instance_test.go @@ -0,0 +1,176 @@ +package instance + +import ( + "os" + "path/filepath" + "testing" +) + +func TestManager_CreateInstance(t *testing.T) { + tmpDir := t.TempDir() + m := NewManager(tmpDir) + + instanceName := "test-cloud" + + // Create instance + err := m.CreateInstance(instanceName) + if err != nil { + t.Fatalf("CreateInstance failed: %v", err) + } + + // Verify instance directory structure + instancePath := m.GetInstancePath(instanceName) + expectedDirs := []string{ + instancePath, + filepath.Join(instancePath, "talos"), + filepath.Join(instancePath, "k8s"), + filepath.Join(instancePath, "logs"), + filepath.Join(instancePath, "backups"), + } + + for _, dir := range expectedDirs { + info, err := os.Stat(dir) + if err != nil { + t.Errorf("Directory not created: %s: %v", dir, err) + continue + } + if !info.IsDir() { + t.Errorf("Path is not a directory: %s", dir) + } + } + + // Verify config.yaml exists + configPath := m.GetInstanceConfigPath(instanceName) + if _, err := os.Stat(configPath); err != nil { + t.Errorf("Config file not created: %v", err) + } + + // Verify secrets.yaml exists with correct permissions + secretsPath := m.GetInstanceSecretsPath(instanceName) + info, err := os.Stat(secretsPath) + if err != nil { + t.Errorf("Secrets file not created: %v", err) + } else { + // Check permissions (should be 0600) + mode := info.Mode().Perm() + if mode != 0600 { + t.Errorf("Secrets file has wrong permissions: got %o, want 0600", mode) + } + } + + // Test idempotency - creating again should not error + err = m.CreateInstance(instanceName) + if err != nil { + t.Fatalf("CreateInstance not idempotent: %v", err) + } +} + +func TestManager_ListInstances(t *testing.T) { + tmpDir := t.TempDir() + m := NewManager(tmpDir) + + // Initially should be empty + instances, err := m.ListInstances() + if err != nil { + t.Fatalf("ListInstances failed: %v", err) + } + if len(instances) != 0 { + t.Fatalf("Expected 0 instances, got %d", len(instances)) + } + + // Create instances + instanceNames := []string{"cloud1", "cloud2", "cloud3"} + for _, name := range instanceNames { + err := m.CreateInstance(name) + if err != nil { + t.Fatalf("CreateInstance failed: %v", err) + } + } + + // List should return all instances + instances, err = m.ListInstances() + if err != nil { + t.Fatalf("ListInstances failed: %v", err) + } + if len(instances) != len(instanceNames) { + t.Fatalf("Expected %d instances, got %d", len(instanceNames), len(instances)) + } + + // Verify all expected instances are present + instanceMap := make(map[string]bool) + for _, name := range instances { + instanceMap[name] = true + } + for _, expected := range instanceNames { + if !instanceMap[expected] { + t.Errorf("Expected instance %q not found", expected) + } + } +} + +func TestManager_DeleteInstance(t *testing.T) { + tmpDir := t.TempDir() + m := NewManager(tmpDir) + + instanceName := "test-cloud" + + // Create instance + err := m.CreateInstance(instanceName) + if err != nil { + t.Fatalf("CreateInstance failed: %v", err) + } + + // Verify it exists (by checking directory) + instancePath := m.GetInstancePath(instanceName) + if _, err := os.Stat(instancePath); err != nil { + t.Fatalf("Instance should exist: %v", err) + } + + // Delete instance + err = m.DeleteInstance(instanceName) + if err != nil { + t.Fatalf("DeleteInstance failed: %v", err) + } + + // Verify it's gone + err = m.ValidateInstance(instanceName) + if err == nil { + t.Fatalf("Instance should not exist after deletion") + } + + // Deleting non-existent instance should error + err = m.DeleteInstance(instanceName) + if err == nil { + t.Fatalf("Deleting non-existent instance should error") + } +} + +func TestManager_ValidateInstance(t *testing.T) { + tmpDir := t.TempDir() + m := NewManager(tmpDir) + + instanceName := "test-cloud" + + // Should fail for non-existent instance + err := m.ValidateInstance(instanceName) + if err == nil { + t.Fatalf("ValidateInstance should fail for non-existent instance") + } + + // Create instance + err = m.CreateInstance(instanceName) + if err != nil { + t.Fatalf("CreateInstance failed: %v", err) + } + + // Should succeed for existing instance (if yq is available) + // Note: ValidateInstance requires yq for config validation + err = m.ValidateInstance(instanceName) + if err != nil { + // It's OK if yq is not installed, just check instance exists + if !m.InstanceExists(instanceName) { + t.Fatalf("Instance should exist after creation") + } + t.Logf("ValidateInstance failed (likely yq not installed): %v", err) + } +} diff --git a/internal/node/node.go b/internal/node/node.go new file mode 100644 index 0000000..87e3408 --- /dev/null +++ b/internal/node/node.go @@ -0,0 +1,668 @@ +package node + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + + "github.com/wild-cloud/wild-central/daemon/internal/config" + "github.com/wild-cloud/wild-central/daemon/internal/tools" +) + +// Manager handles node configuration and state management +type Manager struct { + dataDir string + configMgr *config.Manager + talosctl *tools.Talosctl +} + +// NewManager creates a new node manager +func NewManager(dataDir string) *Manager { + return &Manager{ + dataDir: dataDir, + configMgr: config.NewManager(), + talosctl: tools.NewTalosctl(), + } +} + +// Node represents a cluster node configuration +type Node struct { + Hostname string `yaml:"hostname" json:"hostname"` + Role string `yaml:"role" json:"role"` // controlplane or worker + TargetIP string `yaml:"targetIp" json:"target_ip"` + CurrentIP string `yaml:"currentIp,omitempty" json:"current_ip,omitempty"` // For maintenance mode detection + Interface string `yaml:"interface,omitempty" json:"interface,omitempty"` + Disk string `yaml:"disk" json:"disk"` + Version string `yaml:"version,omitempty" json:"version,omitempty"` + SchematicID string `yaml:"schematicId,omitempty" json:"schematic_id,omitempty"` + Maintenance bool `yaml:"maintenance,omitempty" json:"maintenance"` // Explicit maintenance mode flag + Configured bool `yaml:"configured,omitempty" json:"configured"` + Applied bool `yaml:"applied,omitempty" json:"applied"` +} + +// HardwareInfo contains discovered hardware information +type HardwareInfo struct { + IP string `json:"ip"` + Interface string `json:"interface"` + Disks []tools.DiskInfo `json:"disks"` + SelectedDisk string `json:"selected_disk"` + MaintenanceMode bool `json:"maintenance_mode"` +} + +// ApplyOptions contains options for node apply +type ApplyOptions struct { + // No options needed - apply always regenerates and auto-fetches templates +} + +// GetInstancePath returns the path to an instance's nodes directory +func (m *Manager) GetInstancePath(instanceName string) string { + return filepath.Join(m.dataDir, "instances", instanceName) +} + +// List returns all nodes for an instance +func (m *Manager) List(instanceName string) ([]Node, error) { + instancePath := m.GetInstancePath(instanceName) + configPath := filepath.Join(instancePath, "config.yaml") + + yq := tools.NewYQ() + + // Get all node hostnames from cluster.nodes.active + output, err := yq.Exec("eval", ".cluster.nodes.active | keys", configPath) + if err != nil { + return nil, fmt.Errorf("failed to read nodes: %w", err) + } + + // Parse hostnames (yq returns YAML array) + hostnamesYAML := string(output) + if hostnamesYAML == "" || hostnamesYAML == "null\n" { + return []Node{}, nil + } + + // Get hostnames line by line + var hostnames []string + for _, line := range strings.Split(hostnamesYAML, "\n") { + line = strings.TrimSpace(line) + if line != "" && line != "null" && line != "-" { + // Remove leading "- " from YAML array + hostname := line + if len(hostname) > 2 && hostname[0:2] == "- " { + hostname = hostname[2:] + } + if hostname != "" { + hostnames = append(hostnames, hostname) + } + } + } + + // Get details for each node + var nodes []Node + for _, hostname := range hostnames { + basePath := fmt.Sprintf(".cluster.nodes.active.%s", hostname) + + // Get node fields + role, _ := yq.Exec("eval", basePath+".role", configPath) + targetIP, _ := yq.Exec("eval", basePath+".targetIp", configPath) + currentIP, _ := yq.Exec("eval", basePath+".currentIp", configPath) + disk, _ := yq.Exec("eval", basePath+".disk", configPath) + iface, _ := yq.Exec("eval", basePath+".interface", configPath) + version, _ := yq.Exec("eval", basePath+".version", configPath) + schematicID, _ := yq.Exec("eval", basePath+".schematicId", configPath) + maintenance, _ := yq.Exec("eval", basePath+".maintenance", configPath) + configured, _ := yq.Exec("eval", basePath+".configured", configPath) + applied, _ := yq.Exec("eval", basePath+".applied", configPath) + + node := Node{ + Hostname: hostname, + Role: tools.CleanYQOutput(string(role)), + TargetIP: tools.CleanYQOutput(string(targetIP)), + CurrentIP: tools.CleanYQOutput(string(currentIP)), + Disk: tools.CleanYQOutput(string(disk)), + Interface: tools.CleanYQOutput(string(iface)), + Version: tools.CleanYQOutput(string(version)), + SchematicID: tools.CleanYQOutput(string(schematicID)), + Maintenance: tools.CleanYQOutput(string(maintenance)) == "true", + Configured: tools.CleanYQOutput(string(configured)) == "true", + Applied: tools.CleanYQOutput(string(applied)) == "true", + } + + nodes = append(nodes, node) + } + + return nodes, nil +} + +// Get returns a specific node by hostname +func (m *Manager) Get(instanceName, hostname string) (*Node, error) { + // Get all nodes + nodes, err := m.List(instanceName) + if err != nil { + return nil, err + } + + // Find node by hostname + for _, node := range nodes { + if node.Hostname == hostname { + return &node, nil + } + } + + return nil, fmt.Errorf("node %s not found", hostname) +} + +// Add registers a new node in config.yaml +func (m *Manager) Add(instanceName string, node *Node) error { + instancePath := m.GetInstancePath(instanceName) + + // Validate node data + if node.Hostname == "" { + return fmt.Errorf("hostname is required") + } + if node.Role != "controlplane" && node.Role != "worker" { + return fmt.Errorf("role must be 'controlplane' or 'worker'") + } + if node.Disk == "" { + return fmt.Errorf("disk is required") + } + + // Check if node already exists - ERROR if yes + existing, err := m.Get(instanceName, node.Hostname) + if err == nil && existing != nil { + return fmt.Errorf("node %s already exists", node.Hostname) + } + + configPath := filepath.Join(instancePath, "config.yaml") + yq := tools.NewYQ() + + // If schematicId not provided, use instance-level default from cluster.nodes.talos.schematicId + if node.SchematicID == "" { + defaultSchematicID, err := yq.Get(configPath, ".cluster.nodes.talos.schematicId") + if err == nil && defaultSchematicID != "" && defaultSchematicID != "null" { + node.SchematicID = defaultSchematicID + } + } + + // If version not provided, use instance-level default from cluster.nodes.talos.version + if node.Version == "" { + defaultVersion, err := yq.Get(configPath, ".cluster.nodes.talos.version") + if err == nil && defaultVersion != "" && defaultVersion != "null" { + node.Version = defaultVersion + } + } + + // Set maintenance=true if currentIP provided (node in maintenance mode) + if node.CurrentIP != "" { + node.Maintenance = true + } + + // Add node to config.yaml + // Path: cluster.nodes.active.{hostname} + basePath := fmt.Sprintf("cluster.nodes.active.%s", node.Hostname) + + // Set each field + if err := yq.Set(configPath, basePath+".role", node.Role); err != nil { + return fmt.Errorf("failed to set role: %w", err) + } + if err := yq.Set(configPath, basePath+".disk", node.Disk); err != nil { + return fmt.Errorf("failed to set disk: %w", err) + } + if node.TargetIP != "" { + if err := yq.Set(configPath, basePath+".targetIp", node.TargetIP); err != nil { + return fmt.Errorf("failed to set targetIP: %w", err) + } + } + if node.CurrentIP != "" { + if err := yq.Set(configPath, basePath+".currentIp", node.CurrentIP); err != nil { + return fmt.Errorf("failed to set currentIP: %w", err) + } + } + if node.Interface != "" { + if err := yq.Set(configPath, basePath+".interface", node.Interface); err != nil { + return fmt.Errorf("failed to set interface: %w", err) + } + } + if node.Version != "" { + if err := yq.Set(configPath, basePath+".version", node.Version); err != nil { + return fmt.Errorf("failed to set version: %w", err) + } + } + if node.SchematicID != "" { + if err := yq.Set(configPath, basePath+".schematicId", node.SchematicID); err != nil { + return fmt.Errorf("failed to set schematicId: %w", err) + } + } + if node.Maintenance { + if err := yq.Set(configPath, basePath+".maintenance", "true"); err != nil { + return fmt.Errorf("failed to set maintenance: %w", err) + } + } + + return nil +} + +// Delete removes a node from config.yaml +func (m *Manager) Delete(instanceName, nodeIdentifier string) error { + // Get node to find hostname + node, err := m.Get(instanceName, nodeIdentifier) + if err != nil { + return err + } + + instancePath := m.GetInstancePath(instanceName) + configPath := filepath.Join(instancePath, "config.yaml") + + // Delete node from config.yaml + // Path: cluster.nodes.active.{hostname} + nodePath := fmt.Sprintf("cluster.nodes.active.%s", node.Hostname) + + yq := tools.NewYQ() + // Use yq to delete the node + _, err = yq.Exec("eval", "-i", fmt.Sprintf("del(%s)", nodePath), configPath) + if err != nil { + return fmt.Errorf("failed to delete node: %w", err) + } + + return nil +} + +// DetectHardware queries node hardware information via talosctl +func (m *Manager) DetectHardware(nodeIP string) (*HardwareInfo, error) { + // Query node with insecure flag (maintenance mode) + insecure := true + + // Try to get default interface (with default route) + iface, err := m.talosctl.GetDefaultInterface(nodeIP, insecure) + if err != nil { + // Fall back to physical interface + iface, err = m.talosctl.GetPhysicalInterface(nodeIP, insecure) + if err != nil { + return nil, fmt.Errorf("failed to detect interface: %w", err) + } + } + + // Get disks + disks, err := m.talosctl.GetDisks(nodeIP, insecure) + if err != nil { + return nil, fmt.Errorf("failed to detect disks: %w", err) + } + + // Select first disk as default + var selectedDisk string + if len(disks) > 0 { + selectedDisk = disks[0].Path + } + + return &HardwareInfo{ + IP: nodeIP, + Interface: iface, + Disks: disks, + SelectedDisk: selectedDisk, + MaintenanceMode: true, + }, nil +} + +// Apply generates configuration and applies it to node +// This follows the wild-node-apply flow: +// 1. Auto-fetch templates if missing +// 2. Generate node-specific patch file from template +// 3. Merge base config + patch → final config (talosctl machineconfig patch) +// 4. Apply final config to node (talosctl apply-config --insecure if maintenance mode) +// 5. Update state: currentIP=targetIP, maintenance=false, applied=true +func (m *Manager) Apply(instanceName, nodeIdentifier string, opts ApplyOptions) error { + // Get node configuration + node, err := m.Get(instanceName, nodeIdentifier) + if err != nil { + return err + } + + instancePath := m.GetInstancePath(instanceName) + setupDir := filepath.Join(instancePath, "setup", "cluster-nodes") + configPath := filepath.Join(instancePath, "config.yaml") + yq := tools.NewYQ() + + // Ensure node has version and schematicId (use cluster defaults if missing) + if node.Version == "" { + defaultVersion, err := yq.Get(configPath, ".cluster.nodes.talos.version") + if err == nil && defaultVersion != "" && defaultVersion != "null" { + node.Version = defaultVersion + } + } + if node.SchematicID == "" { + defaultSchematicID, err := yq.Get(configPath, ".cluster.nodes.talos.schematicId") + if err == nil && defaultSchematicID != "" && defaultSchematicID != "null" { + node.SchematicID = defaultSchematicID + } + } + + // Always auto-fetch templates if they don't exist + templatesDir := filepath.Join(setupDir, "patch.templates") + if !m.templatesExist(templatesDir) { + if err := m.copyTemplatesFromDirectory(templatesDir); err != nil { + return fmt.Errorf("failed to copy templates: %w", err) + } + } + + // Determine base configuration file (generated by cluster config generation) + var baseConfig string + baseConfigDir := filepath.Join(instancePath, "talos", "generated") + if node.Role == "controlplane" { + baseConfig = filepath.Join(baseConfigDir, "controlplane.yaml") + } else { + baseConfig = filepath.Join(baseConfigDir, "worker.yaml") + } + + // Check if base config exists + if _, err := os.Stat(baseConfig); err != nil { + return fmt.Errorf("base configuration not found: %s (run cluster config generation first)", baseConfig) + } + + // Generate node-specific patch file + patchFile, err := m.generateNodePatch(instanceName, node, setupDir) + if err != nil { + return fmt.Errorf("failed to generate node patch: %w", err) + } + + // Generate final machine configuration (base + patch) + finalConfig, err := m.generateFinalConfig(node, baseConfig, patchFile, setupDir) + if err != nil { + return fmt.Errorf("failed to generate final configuration: %w", err) + } + + // Mark as configured + node.Configured = true + if err := m.updateNodeStatus(instanceName, node); err != nil { + return fmt.Errorf("failed to update node status: %w", err) + } + + // Apply configuration to node + // Determine which IP to use and whether node is in maintenance mode + // + // Three scenarios: + // 1. Production node (currentIP empty/same, maintenance=false): use targetIP, no --insecure + // 2. IP changing (currentIP != targetIP): use currentIP, --insecure (always maintenance) + // 3. Maintenance at target (maintenance=true, no IP change): use targetIP, --insecure + var deployIP string + var maintenanceMode bool + + if node.CurrentIP != "" && node.CurrentIP != node.TargetIP { + // Scenario 2: IP is changing - node is at currentIP, moving to targetIP + deployIP = node.CurrentIP + maintenanceMode = true + } else if node.Maintenance { + // Scenario 3: Explicit maintenance mode, no IP change + deployIP = node.TargetIP + maintenanceMode = true + } else { + // Scenario 1: Production node at target IP + deployIP = node.TargetIP + maintenanceMode = false + } + + // Apply config + talosconfigPath := tools.GetTalosconfigPath(m.dataDir, instanceName) + if err := m.talosctl.ApplyConfig(deployIP, finalConfig, maintenanceMode, talosconfigPath); err != nil { + return fmt.Errorf("failed to apply config to %s: %w", deployIP, err) + } + + // Post-application updates: move to production IP, exit maintenance mode + node.Applied = true + node.CurrentIP = node.TargetIP // Node now on production IP + node.Maintenance = false // Exit maintenance mode + if err := m.updateNodeStatus(instanceName, node); err != nil { + return fmt.Errorf("failed to update node status: %w", err) + } + + return nil +} + +// generateNodePatch creates a node-specific patch file from template +func (m *Manager) generateNodePatch(instanceName string, node *Node, setupDir string) (string, error) { + // Determine template file based on role + var templateFile string + if node.Role == "controlplane" { + templateFile = filepath.Join(setupDir, "patch.templates", "controlplane.yaml") + } else { + templateFile = filepath.Join(setupDir, "patch.templates", "worker.yaml") + } + + // Read template + templateContent, err := os.ReadFile(templateFile) + if err != nil { + return "", fmt.Errorf("failed to read template %s: %w", templateFile, err) + } + + // Stage 1: Apply simple variable substitutions (like v.PoC does with sed) + patchContent := string(templateContent) + patchContent = strings.ReplaceAll(patchContent, "{{NODE_NAME}}", node.Hostname) + patchContent = strings.ReplaceAll(patchContent, "{{NODE_IP}}", node.TargetIP) + patchContent = strings.ReplaceAll(patchContent, "{{SCHEMATIC_ID}}", node.SchematicID) + patchContent = strings.ReplaceAll(patchContent, "{{VERSION}}", node.Version) + + // Stage 2: Process through gomplate with config.yaml context (like v.PoC does with wild-compile-template) + instancePath := m.GetInstancePath(instanceName) + configPath := filepath.Join(instancePath, "config.yaml") + + // Use gomplate to process template with config context + cmd := exec.Command("gomplate", "-c", ".="+configPath) + cmd.Stdin = strings.NewReader(patchContent) + + output, err := cmd.CombinedOutput() + if err != nil { + return "", fmt.Errorf("failed to process template with gomplate: %w\nOutput: %s", err, string(output)) + } + + processedPatch := string(output) + + // Create patch directory + patchDir := filepath.Join(setupDir, "patch") + if err := os.MkdirAll(patchDir, 0755); err != nil { + return "", fmt.Errorf("failed to create patch directory: %w", err) + } + + // Write patch file + patchFile := filepath.Join(patchDir, node.Hostname+".yaml") + if err := os.WriteFile(patchFile, []byte(processedPatch), 0644); err != nil { + return "", fmt.Errorf("failed to write patch file: %w", err) + } + + return patchFile, nil +} + +// generateFinalConfig merges base config + patch to create final machine config +func (m *Manager) generateFinalConfig(node *Node, baseConfig, patchFile, setupDir string) (string, error) { + // Create final config directory + finalDir := filepath.Join(setupDir, "final") + if err := os.MkdirAll(finalDir, 0755); err != nil { + return "", fmt.Errorf("failed to create final directory: %w", err) + } + + finalConfig := filepath.Join(finalDir, node.Hostname+".yaml") + + // Use talosctl machineconfig patch to merge base + patch + // talosctl machineconfig patch base.yaml --patch @patch.yaml -o final.yaml + cmd := exec.Command("talosctl", "machineconfig", "patch", baseConfig, + "--patch", "@"+patchFile, + "-o", finalConfig) + + output, err := cmd.CombinedOutput() + if err != nil { + return "", fmt.Errorf("failed to patch machine config: %w\nOutput: %s", err, string(output)) + } + + return finalConfig, nil +} + +// templatesExist checks if patch templates exist in the instance directory +func (m *Manager) templatesExist(templatesDir string) bool { + controlplaneTemplate := filepath.Join(templatesDir, "controlplane.yaml") + workerTemplate := filepath.Join(templatesDir, "worker.yaml") + + _, err1 := os.Stat(controlplaneTemplate) + _, err2 := os.Stat(workerTemplate) + + return err1 == nil && err2 == nil +} + +// copyTemplatesFromDirectory copies patch templates from directory/ to instance +func (m *Manager) copyTemplatesFromDirectory(destDir string) error { + // Find the directory/setup/cluster-nodes/patch.templates directory + // It should be in the same parent as the data directory + sourceDir := filepath.Join(filepath.Dir(m.dataDir), "directory", "setup", "cluster-nodes", "patch.templates") + + // Check if source directory exists + if _, err := os.Stat(sourceDir); err != nil { + return fmt.Errorf("source templates directory not found: %s", sourceDir) + } + + // Create destination directory + if err := os.MkdirAll(destDir, 0755); err != nil { + return fmt.Errorf("failed to create templates directory: %w", err) + } + + // Copy controlplane.yaml + if err := m.copyFile( + filepath.Join(sourceDir, "controlplane.yaml"), + filepath.Join(destDir, "controlplane.yaml"), + ); err != nil { + return fmt.Errorf("failed to copy controlplane template: %w", err) + } + + // Copy worker.yaml + if err := m.copyFile( + filepath.Join(sourceDir, "worker.yaml"), + filepath.Join(destDir, "worker.yaml"), + ); err != nil { + return fmt.Errorf("failed to copy worker template: %w", err) + } + + return nil +} + +// copyFile copies a file from src to dst +func (m *Manager) copyFile(src, dst string) error { + data, err := os.ReadFile(src) + if err != nil { + return err + } + + return os.WriteFile(dst, data, 0644) +} + +// updateNodeStatus updates node status flags in config.yaml +func (m *Manager) updateNodeStatus(instanceName string, node *Node) error { + instancePath := m.GetInstancePath(instanceName) + configPath := filepath.Join(instancePath, "config.yaml") + basePath := fmt.Sprintf("cluster.nodes.active.%s", node.Hostname) + + yq := tools.NewYQ() + + // Update maintenance flag + if node.Maintenance { + if err := yq.Set(configPath, basePath+".maintenance", "true"); err != nil { + return err + } + } else { + if err := yq.Set(configPath, basePath+".maintenance", "false"); err != nil { + return err + } + } + + // Update currentIP (may have changed after application) + if node.CurrentIP != "" { + if err := yq.Set(configPath, basePath+".currentIp", node.CurrentIP); err != nil { + return err + } + } + + // Update configured flag + if node.Configured { + if err := yq.Set(configPath, basePath+".configured", "true"); err != nil { + return err + } + } + + // Update applied flag + if node.Applied { + if err := yq.Set(configPath, basePath+".applied", "true"); err != nil { + return err + } + } + + return nil +} + +// Update modifies existing node configuration with partial updates +func (m *Manager) Update(instanceName string, hostname string, updates map[string]interface{}) error { + // Get existing node + node, err := m.Get(instanceName, hostname) + if err != nil { + return fmt.Errorf("node %s not found", hostname) + } + + instancePath := m.GetInstancePath(instanceName) + configPath := filepath.Join(instancePath, "config.yaml") + basePath := fmt.Sprintf("cluster.nodes.active.%s", hostname) + yq := tools.NewYQ() + + // Apply partial updates + for key, value := range updates { + switch key { + case "target_ip": + if strVal, ok := value.(string); ok { + node.TargetIP = strVal + if err := yq.Set(configPath, basePath+".targetIp", strVal); err != nil { + return fmt.Errorf("failed to update targetIp: %w", err) + } + } + case "current_ip": + if strVal, ok := value.(string); ok { + node.CurrentIP = strVal + node.Maintenance = true // Auto-set maintenance when currentIP changes + if err := yq.Set(configPath, basePath+".currentIp", strVal); err != nil { + return fmt.Errorf("failed to update currentIp: %w", err) + } + if err := yq.Set(configPath, basePath+".maintenance", "true"); err != nil { + return fmt.Errorf("failed to set maintenance: %w", err) + } + } + case "disk": + if strVal, ok := value.(string); ok { + node.Disk = strVal + if err := yq.Set(configPath, basePath+".disk", strVal); err != nil { + return fmt.Errorf("failed to update disk: %w", err) + } + } + case "interface": + if strVal, ok := value.(string); ok { + node.Interface = strVal + if err := yq.Set(configPath, basePath+".interface", strVal); err != nil { + return fmt.Errorf("failed to update interface: %w", err) + } + } + case "schematic_id": + if strVal, ok := value.(string); ok { + node.SchematicID = strVal + if err := yq.Set(configPath, basePath+".schematicId", strVal); err != nil { + return fmt.Errorf("failed to update schematicId: %w", err) + } + } + case "maintenance": + if boolVal, ok := value.(bool); ok { + node.Maintenance = boolVal + if err := yq.Set(configPath, basePath+".maintenance", fmt.Sprintf("%t", boolVal)); err != nil { + return fmt.Errorf("failed to update maintenance: %w", err) + } + } + } + } + + return nil +} + +// FetchTemplates copies patch templates from directory/ to instance +func (m *Manager) FetchTemplates(instanceName string) error { + instancePath := m.GetInstancePath(instanceName) + destDir := filepath.Join(instancePath, "setup", "cluster-nodes", "patch.templates") + return m.copyTemplatesFromDirectory(destDir) +} diff --git a/internal/operations/broadcaster.go b/internal/operations/broadcaster.go new file mode 100644 index 0000000..9736983 --- /dev/null +++ b/internal/operations/broadcaster.go @@ -0,0 +1,75 @@ +package operations + +import ( + "sync" +) + +// Broadcaster manages SSE clients subscribed to operation output +type Broadcaster struct { + clients map[string]map[chan []byte]bool // opID -> set of client channels + mu sync.RWMutex +} + +// NewBroadcaster creates a new broadcaster +func NewBroadcaster() *Broadcaster { + return &Broadcaster{ + clients: make(map[string]map[chan []byte]bool), + } +} + +// Subscribe creates a new channel for receiving operation output +func (b *Broadcaster) Subscribe(opID string) chan []byte { + b.mu.Lock() + defer b.mu.Unlock() + + ch := make(chan []byte, 100) // Buffered to prevent slow clients from blocking + if b.clients[opID] == nil { + b.clients[opID] = make(map[chan []byte]bool) + } + b.clients[opID][ch] = true + return ch +} + +// Unsubscribe removes a client channel and closes it +func (b *Broadcaster) Unsubscribe(opID string, ch chan []byte) { + b.mu.Lock() + defer b.mu.Unlock() + + if clients, ok := b.clients[opID]; ok { + delete(clients, ch) + close(ch) + if len(clients) == 0 { + delete(b.clients, opID) + } + } +} + +// Publish sends data to all subscribed clients for an operation +func (b *Broadcaster) Publish(opID string, data []byte) { + b.mu.RLock() + defer b.mu.RUnlock() + + if clients, ok := b.clients[opID]; ok { + for ch := range clients { + select { + case ch <- data: + // Sent successfully + default: + // Channel buffer full, skip this message for this client + } + } + } +} + +// Close closes all client channels for an operation +func (b *Broadcaster) Close(opID string) { + b.mu.Lock() + defer b.mu.Unlock() + + if clients, ok := b.clients[opID]; ok { + for ch := range clients { + close(ch) + } + delete(b.clients, opID) + } +} diff --git a/internal/operations/operations.go b/internal/operations/operations.go new file mode 100644 index 0000000..fc67bab --- /dev/null +++ b/internal/operations/operations.go @@ -0,0 +1,255 @@ +package operations + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + "time" + + "github.com/wild-cloud/wild-central/daemon/internal/storage" +) + +// Manager handles async operation tracking +type Manager struct { + dataDir string +} + +// NewManager creates a new operations manager +func NewManager(dataDir string) *Manager { + return &Manager{ + dataDir: dataDir, + } +} + +// Operation represents a long-running operation +type Operation struct { + ID string `json:"id"` + Type string `json:"type"` // discover, setup, download, bootstrap + Target string `json:"target"` + Instance string `json:"instance"` + Status string `json:"status"` // pending, running, completed, failed, cancelled + Message string `json:"message,omitempty"` + Progress int `json:"progress"` // 0-100 + LogFile string `json:"logFile,omitempty"` // Path to output log file + StartedAt time.Time `json:"started_at"` + EndedAt time.Time `json:"ended_at,omitempty"` +} + +// GetOperationsDir returns the operations directory for an instance +func (m *Manager) GetOperationsDir(instanceName string) string { + return filepath.Join(m.dataDir, "instances", instanceName, "operations") +} + +// generateID generates a unique operation ID +func generateID(opType, target string) string { + timestamp := time.Now().UnixNano() + return fmt.Sprintf("op_%s_%s_%d", opType, target, timestamp) +} + +// Start begins tracking a new operation +func (m *Manager) Start(instanceName, opType, target string) (string, error) { + opsDir := m.GetOperationsDir(instanceName) + + // Ensure operations directory exists + if err := storage.EnsureDir(opsDir, 0755); err != nil { + return "", err + } + + // Generate operation ID + opID := generateID(opType, target) + + // Create operation + op := &Operation{ + ID: opID, + Type: opType, + Target: target, + Instance: instanceName, + Status: "pending", + Progress: 0, + StartedAt: time.Now(), + } + + // Write operation file + if err := m.writeOperation(op); err != nil { + return "", err + } + + return opID, nil +} + +// Get returns operation status +func (m *Manager) Get(opID string) (*Operation, error) { + // Operation ID contains instance name, but we need to find it + // For now, we'll scan all instances (not ideal but simple) + // Better approach: encode instance in operation ID or maintain index + + // Simplified: assume operation ID format is op_{type}_{target}_{timestamp} + // We need to know which instance to look in + // For now, return error if we can't find it + + // This needs improvement in actual implementation + return nil, fmt.Errorf("operation lookup not implemented - need instance context") +} + +// GetByInstance returns an operation for a specific instance +func (m *Manager) GetByInstance(instanceName, opID string) (*Operation, error) { + opsDir := m.GetOperationsDir(instanceName) + opPath := filepath.Join(opsDir, opID+".json") + + if !storage.FileExists(opPath) { + return nil, fmt.Errorf("operation %s not found", opID) + } + + data, err := os.ReadFile(opPath) + if err != nil { + return nil, fmt.Errorf("failed to read operation: %w", err) + } + + var op Operation + if err := json.Unmarshal(data, &op); err != nil { + return nil, fmt.Errorf("failed to parse operation: %w", err) + } + + return &op, nil +} + +// Update modifies operation state +func (m *Manager) Update(instanceName, opID, status, message string, progress int) error { + op, err := m.GetByInstance(instanceName, opID) + if err != nil { + return err + } + + op.Status = status + op.Message = message + op.Progress = progress + + if status == "completed" || status == "failed" || status == "cancelled" { + op.EndedAt = time.Now() + } + + return m.writeOperation(op) +} + +// UpdateStatus updates only the status +func (m *Manager) UpdateStatus(instanceName, opID, status string) error { + op, err := m.GetByInstance(instanceName, opID) + if err != nil { + return err + } + + op.Status = status + + if status == "completed" || status == "failed" || status == "cancelled" { + op.EndedAt = time.Now() + } + + return m.writeOperation(op) +} + +// UpdateProgress updates operation progress +func (m *Manager) UpdateProgress(instanceName, opID string, progress int, message string) error { + op, err := m.GetByInstance(instanceName, opID) + if err != nil { + return err + } + + op.Progress = progress + if message != "" { + op.Message = message + } + + return m.writeOperation(op) +} + +// Cancel requests operation cancellation +func (m *Manager) Cancel(instanceName, opID string) error { + return m.UpdateStatus(instanceName, opID, "cancelled") +} + +// List returns all operations for an instance +func (m *Manager) List(instanceName string) ([]Operation, error) { + opsDir := m.GetOperationsDir(instanceName) + + // Ensure directory exists + if err := storage.EnsureDir(opsDir, 0755); err != nil { + return nil, err + } + + // Read all operation files + entries, err := os.ReadDir(opsDir) + if err != nil { + return nil, fmt.Errorf("failed to read operations directory: %w", err) + } + + operations := []Operation{} + for _, entry := range entries { + if entry.IsDir() || filepath.Ext(entry.Name()) != ".json" { + continue + } + + opPath := filepath.Join(opsDir, entry.Name()) + data, err := os.ReadFile(opPath) + if err != nil { + continue // Skip files we can't read + } + + var op Operation + if err := json.Unmarshal(data, &op); err != nil { + continue // Skip invalid JSON + } + + operations = append(operations, op) + } + + return operations, nil +} + +// Delete removes an operation record +func (m *Manager) Delete(instanceName, opID string) error { + opsDir := m.GetOperationsDir(instanceName) + opPath := filepath.Join(opsDir, opID+".json") + + if !storage.FileExists(opPath) { + return nil // Already deleted, idempotent + } + + return os.Remove(opPath) +} + +// Cleanup removes old completed/failed operations +func (m *Manager) Cleanup(instanceName string, olderThan time.Duration) error { + ops, err := m.List(instanceName) + if err != nil { + return err + } + + cutoff := time.Now().Add(-olderThan) + + for _, op := range ops { + if (op.Status == "completed" || op.Status == "failed" || op.Status == "cancelled") && + !op.EndedAt.IsZero() && op.EndedAt.Before(cutoff) { + m.Delete(instanceName, op.ID) + } + } + + return nil +} + +// writeOperation writes operation to disk +func (m *Manager) writeOperation(op *Operation) error { + opsDir := m.GetOperationsDir(op.Instance) + opPath := filepath.Join(opsDir, op.ID+".json") + + data, err := json.MarshalIndent(op, "", " ") + if err != nil { + return fmt.Errorf("failed to marshal operation: %w", err) + } + + if err := storage.WriteFile(opPath, data, 0644); err != nil { + return fmt.Errorf("failed to write operation: %w", err) + } + + return nil +} diff --git a/internal/pxe/pxe.go b/internal/pxe/pxe.go new file mode 100644 index 0000000..4e9aa97 --- /dev/null +++ b/internal/pxe/pxe.go @@ -0,0 +1,220 @@ +package pxe + +import ( + "crypto/sha256" + "fmt" + "io" + "net/http" + "os" + "path/filepath" + + "github.com/wild-cloud/wild-central/daemon/internal/storage" +) + +// Manager handles PXE boot asset management +type Manager struct { + dataDir string +} + +// NewManager creates a new PXE manager +func NewManager(dataDir string) *Manager { + return &Manager{ + dataDir: dataDir, + } +} + +// Asset represents a PXE boot asset +type Asset struct { + Type string `json:"type"` // kernel, initramfs, iso + Version string `json:"version"` + Path string `json:"path"` + Size int64 `json:"size"` + SHA256 string `json:"sha256,omitempty"` + Downloaded bool `json:"downloaded"` +} + +// GetPXEDir returns the PXE directory for an instance +func (m *Manager) GetPXEDir(instanceName string) string { + return filepath.Join(m.dataDir, "instances", instanceName, "pxe") +} + +// ListAssets returns available PXE assets for an instance +func (m *Manager) ListAssets(instanceName string) ([]Asset, error) { + pxeDir := m.GetPXEDir(instanceName) + + // Ensure PXE directory exists + if err := storage.EnsureDir(pxeDir, 0755); err != nil { + return nil, err + } + + assets := []Asset{} + + // Check for common assets + assetTypes := []struct { + name string + path string + }{ + {"kernel", "kernel"}, + {"initramfs", "initramfs.xz"}, + {"iso", "talos.iso"}, + } + + for _, at := range assetTypes { + assetPath := filepath.Join(pxeDir, at.path) + info, err := os.Stat(assetPath) + + asset := Asset{ + Type: at.name, + Path: assetPath, + Downloaded: err == nil, + } + + if err == nil { + asset.Size = info.Size() + // Calculate SHA256 if file exists + if hash, err := calculateSHA256(assetPath); err == nil { + asset.SHA256 = hash + } + } + + assets = append(assets, asset) + } + + return assets, nil +} + +// DownloadAsset downloads a PXE asset +func (m *Manager) DownloadAsset(instanceName, assetType, version, url string) error { + pxeDir := m.GetPXEDir(instanceName) + + // Ensure PXE directory exists + if err := storage.EnsureDir(pxeDir, 0755); err != nil { + return err + } + + // Determine filename based on asset type + var filename string + switch assetType { + case "kernel": + filename = "kernel" + case "initramfs": + filename = "initramfs.xz" + case "iso": + filename = "talos.iso" + default: + return fmt.Errorf("unknown asset type: %s", assetType) + } + + assetPath := filepath.Join(pxeDir, filename) + + // Check if asset already exists (idempotency) + if storage.FileExists(assetPath) { + return nil // Already downloaded + } + + // Download file + resp, err := http.Get(url) + if err != nil { + return fmt.Errorf("failed to download %s: %w", url, err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("failed to download %s: status %d", url, resp.StatusCode) + } + + // Create temporary file + tmpFile := assetPath + ".tmp" + out, err := os.Create(tmpFile) + if err != nil { + return fmt.Errorf("failed to create file: %w", err) + } + defer out.Close() + + // Copy data + _, err = io.Copy(out, resp.Body) + if err != nil { + os.Remove(tmpFile) + return fmt.Errorf("failed to write file: %w", err) + } + + // Move to final location + if err := os.Rename(tmpFile, assetPath); err != nil { + os.Remove(tmpFile) + return fmt.Errorf("failed to move file: %w", err) + } + + return nil +} + +// GetAssetPath returns the local path for an asset +func (m *Manager) GetAssetPath(instanceName, assetType string) (string, error) { + pxeDir := m.GetPXEDir(instanceName) + + var filename string + switch assetType { + case "kernel": + filename = "kernel" + case "initramfs": + filename = "initramfs.xz" + case "iso": + filename = "talos.iso" + default: + return "", fmt.Errorf("unknown asset type: %s", assetType) + } + + assetPath := filepath.Join(pxeDir, filename) + + if !storage.FileExists(assetPath) { + return "", fmt.Errorf("asset %s not found", assetType) + } + + return assetPath, nil +} + +// VerifyAsset checks if an asset exists and is valid +func (m *Manager) VerifyAsset(instanceName, assetType string) (bool, error) { + assetPath, err := m.GetAssetPath(instanceName, assetType) + if err != nil { + return false, nil // Asset doesn't exist, but that's not an error for verification + } + + // Check if file is readable + info, err := os.Stat(assetPath) + if err != nil { + return false, err + } + + // Check if file has size + if info.Size() == 0 { + return false, fmt.Errorf("asset %s is empty", assetType) + } + + return true, nil +} + +// DeleteAsset removes an asset +func (m *Manager) DeleteAsset(instanceName, assetType string) error { + assetPath, err := m.GetAssetPath(instanceName, assetType) + if err != nil { + return nil // Asset doesn't exist, idempotent + } + + return os.Remove(assetPath) +} + +// calculateSHA256 computes the SHA256 hash of a file +func calculateSHA256(filePath string) (string, error) { + file, err := os.Open(filePath) + if err != nil { + return "", err + } + defer file.Close() + + hash := sha256.New() + if _, err := io.Copy(hash, file); err != nil { + return "", err + } + + return fmt.Sprintf("%x", hash.Sum(nil)), nil +} diff --git a/internal/secrets/secrets.go b/internal/secrets/secrets.go new file mode 100644 index 0000000..9737ecd --- /dev/null +++ b/internal/secrets/secrets.go @@ -0,0 +1,166 @@ +package secrets + +import ( + "crypto/rand" + "fmt" + "math/big" + "path/filepath" + + "github.com/wild-cloud/wild-central/daemon/internal/storage" + "github.com/wild-cloud/wild-central/daemon/internal/tools" +) + +const ( + // DefaultSecretLength is 32 characters + DefaultSecretLength = 32 + // Alphanumeric characters for secret generation + alphanumeric = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" +) + +// Manager handles secret generation and storage +type Manager struct { + yq *tools.YQ +} + +// NewManager creates a new secrets manager +func NewManager() *Manager { + return &Manager{ + yq: tools.NewYQ(), + } +} + +// GenerateSecret generates a cryptographically secure random alphanumeric string +func GenerateSecret(length int) (string, error) { + if length <= 0 { + length = DefaultSecretLength + } + + result := make([]byte, length) + for i := range result { + num, err := rand.Int(rand.Reader, big.NewInt(int64(len(alphanumeric)))) + if err != nil { + return "", fmt.Errorf("generating random number: %w", err) + } + result[i] = alphanumeric[num.Int64()] + } + + return string(result), nil +} + +// EnsureSecretsFile ensures a secrets file exists with proper structure and permissions +func (m *Manager) EnsureSecretsFile(instancePath string) error { + secretsPath := filepath.Join(instancePath, "secrets.yaml") + + // Check if secrets file already exists + if storage.FileExists(secretsPath) { + // Ensure proper permissions + if err := storage.EnsureFilePermissions(secretsPath, 0600); err != nil { + return err + } + return nil + } + + // Create minimal secrets structure + initialSecrets := `# Wild Cloud Instance Secrets +# WARNING: This file contains sensitive data. Keep secure! +cluster: + talosSecrets: "" + kubeconfig: "" +certManager: + cloudflare: + apiToken: "" +` + + // Ensure instance directory exists + if err := storage.EnsureDir(instancePath, 0755); err != nil { + return err + } + + // Write secrets file with restrictive permissions (0600) + if err := storage.WriteFile(secretsPath, []byte(initialSecrets), 0600); err != nil { + return err + } + + return nil +} + +// GetSecret retrieves a secret value from a secrets file +func (m *Manager) GetSecret(secretsPath, key string) (string, error) { + if !storage.FileExists(secretsPath) { + return "", fmt.Errorf("secrets file not found: %s", secretsPath) + } + + value, err := m.yq.Get(secretsPath, fmt.Sprintf(".%s", key)) + if err != nil { + return "", fmt.Errorf("getting secret %s: %w", key, err) + } + + return value, nil +} + +// SetSecret sets a secret value in a secrets file +func (m *Manager) SetSecret(secretsPath, key, value string) error { + if !storage.FileExists(secretsPath) { + return fmt.Errorf("secrets file not found: %s", secretsPath) + } + + // Acquire lock before modifying + lockPath := secretsPath + ".lock" + return storage.WithLock(lockPath, func() error { + // Don't wrap value in quotes - yq handles YAML quoting automatically + if err := m.yq.Set(secretsPath, fmt.Sprintf(".%s", key), value); err != nil { + return err + } + // Ensure permissions remain secure after modification + return storage.EnsureFilePermissions(secretsPath, 0600) + }) +} + +// EnsureSecret generates and sets a secret only if it doesn't exist (idempotent) +func (m *Manager) EnsureSecret(secretsPath, key string, length int) (string, error) { + if !storage.FileExists(secretsPath) { + return "", fmt.Errorf("secrets file not found: %s", secretsPath) + } + + // Check if secret already exists + existingSecret, err := m.GetSecret(secretsPath, key) + if err == nil && existingSecret != "" && existingSecret != "null" { + // Secret already exists, return it + return existingSecret, nil + } + + // Generate new secret + secret, err := GenerateSecret(length) + if err != nil { + return "", err + } + + // Set the secret + if err := m.SetSecret(secretsPath, key, secret); err != nil { + return "", err + } + + return secret, nil +} + +// GenerateAndStoreSecret is a convenience function that generates a secret and stores it +func (m *Manager) GenerateAndStoreSecret(secretsPath, key string) (string, error) { + return m.EnsureSecret(secretsPath, key, DefaultSecretLength) +} + +// DeleteSecret removes a secret from a secrets file +func (m *Manager) DeleteSecret(secretsPath, key string) error { + if !storage.FileExists(secretsPath) { + return fmt.Errorf("secrets file not found: %s", secretsPath) + } + + // Acquire lock before modifying + lockPath := secretsPath + ".lock" + return storage.WithLock(lockPath, func() error { + if err := m.yq.Delete(secretsPath, fmt.Sprintf(".%s", key)); err != nil { + return err + } + // Ensure permissions remain secure after modification + return storage.EnsureFilePermissions(secretsPath, 0600) + }) +} diff --git a/internal/secrets/secrets_test.go b/internal/secrets/secrets_test.go new file mode 100644 index 0000000..572b75b --- /dev/null +++ b/internal/secrets/secrets_test.go @@ -0,0 +1,121 @@ +package secrets + +import ( + "os" + "path/filepath" + "testing" +) + +func TestGenerateSecret(t *testing.T) { + // Test various lengths + lengths := []int{32, 64, 128} + for _, length := range lengths { + secret, err := GenerateSecret(length) + if err != nil { + t.Fatalf("GenerateSecret(%d) failed: %v", length, err) + } + + if len(secret) != length { + t.Errorf("Expected length %d, got %d", length, len(secret)) + } + + // Verify only alphanumeric characters + for _, c := range secret { + if !((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9')) { + t.Errorf("Non-alphanumeric character found: %c", c) + } + } + } + + // Test that secrets are different (not deterministic) + secret1, _ := GenerateSecret(32) + secret2, _ := GenerateSecret(32) + if secret1 == secret2 { + t.Errorf("Generated secrets should be different") + } +} + +func TestManager_EnsureSecretsFile(t *testing.T) { + tmpDir := t.TempDir() + m := NewManager() + + instancePath := filepath.Join(tmpDir, "test-cloud") + err := os.MkdirAll(instancePath, 0755) + if err != nil { + t.Fatalf("Failed to create instance dir: %v", err) + } + + // Ensure secrets + err = m.EnsureSecretsFile(instancePath) + if err != nil { + t.Fatalf("EnsureSecretsFile failed: %v", err) + } + + secretsPath := filepath.Join(instancePath, "secrets.yaml") + + // Verify file exists + info, err := os.Stat(secretsPath) + if err != nil { + t.Fatalf("Secrets file not created: %v", err) + } + + // Verify permissions are 0600 + mode := info.Mode().Perm() + if mode != 0600 { + t.Errorf("Wrong permissions: got %o, want 0600", mode) + } + + // Test idempotency - calling again should not error + err = m.EnsureSecretsFile(instancePath) + if err != nil { + t.Fatalf("EnsureSecretsFile not idempotent: %v", err) + } +} + +func TestManager_SetAndGetSecret(t *testing.T) { + tmpDir := t.TempDir() + m := NewManager() + + instancePath := filepath.Join(tmpDir, "test-cloud") + err := os.MkdirAll(instancePath, 0755) + if err != nil { + t.Fatalf("Failed to create instance dir: %v", err) + } + + secretsPath := filepath.Join(instancePath, "secrets.yaml") + + // Initialize secrets + err = m.EnsureSecretsFile(instancePath) + if err != nil { + t.Fatalf("EnsureSecretsFile failed: %v", err) + } + + // Set a custom secret (requires yq) + err = m.SetSecret(secretsPath, "customSecret", "myvalue123") + if err != nil { + t.Skipf("SetSecret requires yq: %v", err) + return + } + + // Get the secret back + value, err := m.GetSecret(secretsPath, "customSecret") + if err != nil { + t.Fatalf("GetSecret failed: %v", err) + } + + if value != "myvalue123" { + t.Errorf("Secret not retrieved correctly: got %q, want %q", value, "myvalue123") + } + + // Verify permissions still 0600 + info, _ := os.Stat(secretsPath) + if info.Mode().Perm() != 0600 { + t.Errorf("Permissions changed after SetSecret") + } + + // Get non-existent secret should error + _, err = m.GetSecret(secretsPath, "nonExistent") + if err == nil { + t.Fatalf("GetSecret should fail for non-existent secret") + } +} diff --git a/internal/services/broadcast_writer.go b/internal/services/broadcast_writer.go new file mode 100644 index 0000000..6735b17 --- /dev/null +++ b/internal/services/broadcast_writer.go @@ -0,0 +1,66 @@ +package services + +import ( + "bytes" + "os" + + "github.com/wild-cloud/wild-central/daemon/internal/operations" +) + +// broadcastWriter writes output to both a file and broadcasts to SSE clients +type broadcastWriter struct { + file *os.File + broadcaster *operations.Broadcaster + opID string + buffer *bytes.Buffer +} + +// newBroadcastWriter creates a writer that writes to file and broadcasts +func newBroadcastWriter(file *os.File, broadcaster *operations.Broadcaster, opID string) *broadcastWriter { + return &broadcastWriter{ + file: file, + broadcaster: broadcaster, + opID: opID, + buffer: &bytes.Buffer{}, + } +} + +// Write implements io.Writer interface +func (w *broadcastWriter) Write(p []byte) (n int, err error) { + // Write to file first + n, err = w.file.Write(p) + if err != nil { + return n, err + } + + // Buffer the data and broadcast complete lines + if w.broadcaster != nil { + w.buffer.Write(p) + + // Extract and broadcast complete lines + for { + line, err := w.buffer.ReadBytes('\n') + if err != nil { + // No complete line, put back what we read and break + w.buffer.Write(line) + break + } + // Broadcast the line without the trailing newline + if len(line) > 0 && line[len(line)-1] == '\n' { + line = line[:len(line)-1] + } + w.broadcaster.Publish(w.opID, line) + } + } + + return n, nil +} + +// Flush broadcasts any remaining buffered data +func (w *broadcastWriter) Flush() { + if w.broadcaster != nil && w.buffer.Len() > 0 { + // Broadcast the remaining incomplete line + w.broadcaster.Publish(w.opID, w.buffer.Bytes()) + w.buffer.Reset() + } +} diff --git a/internal/services/manifest.go b/internal/services/manifest.go new file mode 100644 index 0000000..88cb2ec --- /dev/null +++ b/internal/services/manifest.go @@ -0,0 +1,122 @@ +package services + +import ( + "fmt" + "os" + "path/filepath" + + "gopkg.in/yaml.v3" + + "github.com/wild-cloud/wild-central/daemon/internal/storage" +) + +// ServiceManifest defines a service deployment configuration +// Matches the simple app manifest pattern +type ServiceManifest struct { + Name string `yaml:"name" json:"name"` + Description string `yaml:"description" json:"description"` + Namespace string `yaml:"namespace" json:"namespace"` + Category string `yaml:"category,omitempty" json:"category,omitempty"` + Dependencies []string `yaml:"dependencies,omitempty" json:"dependencies,omitempty"` + ConfigReferences []string `yaml:"configReferences,omitempty" json:"configReferences,omitempty"` + ServiceConfig map[string]ConfigDefinition `yaml:"serviceConfig,omitempty" json:"serviceConfig,omitempty"` +} + +// ConfigDefinition defines config that should be prompted during service setup +type ConfigDefinition struct { + Path string `yaml:"path" json:"path"` // Config path to set + Prompt string `yaml:"prompt" json:"prompt"` // User prompt text + Default string `yaml:"default" json:"default"` // Default value (supports templates) + Type string `yaml:"type,omitempty" json:"type,omitempty"` // Value type: string|int|bool (default: string) +} + +// LoadManifest reads and parses a service manifest from a service directory +func LoadManifest(serviceDir string) (*ServiceManifest, error) { + manifestPath := filepath.Join(serviceDir, "wild-manifest.yaml") + + if !storage.FileExists(manifestPath) { + return nil, fmt.Errorf("manifest not found: %s", manifestPath) + } + + data, err := os.ReadFile(manifestPath) + if err != nil { + return nil, fmt.Errorf("failed to read manifest: %w", err) + } + + var manifest ServiceManifest + if err := yaml.Unmarshal(data, &manifest); err != nil { + return nil, fmt.Errorf("failed to parse manifest: %w", err) + } + + // Validate required fields + if manifest.Name == "" { + return nil, fmt.Errorf("manifest missing name") + } + if manifest.Namespace == "" { + return nil, fmt.Errorf("manifest missing namespace") + } + + return &manifest, nil +} + +// LoadAllManifests loads manifests for all services in a directory +func LoadAllManifests(servicesDir string) (map[string]*ServiceManifest, error) { + manifests := make(map[string]*ServiceManifest) + + entries, err := os.ReadDir(servicesDir) + if err != nil { + return nil, fmt.Errorf("failed to read services directory: %w", err) + } + + for _, entry := range entries { + if !entry.IsDir() { + continue + } + + serviceDir := filepath.Join(servicesDir, entry.Name()) + manifest, err := LoadManifest(serviceDir) + if err != nil { + // Skip services without manifests (they might not be migrated yet) + continue + } + + manifests[manifest.Name] = manifest + } + + return manifests, nil +} + +// GetDeploymentName returns the primary deployment name for this service +// Uses name as the deployment name by default +func (m *ServiceManifest) GetDeploymentName() string { + // For now, assume deployment name matches service name + // This can be made configurable if needed + return m.Name +} + +// GetRequiredConfig returns all config paths that must be set +func (m *ServiceManifest) GetRequiredConfig() []string { + var required []string + + // Add all service config paths (these will be prompted) + for _, cfg := range m.ServiceConfig { + required = append(required, cfg.Path) + } + + return required +} + +// GetAllConfigPaths returns all config paths (references + service config) +func (m *ServiceManifest) GetAllConfigPaths() []string { + var paths []string + + // Config references (must already exist) + paths = append(paths, m.ConfigReferences...) + + // Service config (will be prompted) + for _, cfg := range m.ServiceConfig { + paths = append(paths, cfg.Path) + } + + return paths +} diff --git a/internal/services/services.go b/internal/services/services.go new file mode 100644 index 0000000..8a606b8 --- /dev/null +++ b/internal/services/services.go @@ -0,0 +1,631 @@ +package services + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + + "gopkg.in/yaml.v3" + + "github.com/wild-cloud/wild-central/daemon/internal/operations" + "github.com/wild-cloud/wild-central/daemon/internal/storage" + "github.com/wild-cloud/wild-central/daemon/internal/tools" +) + +// Manager handles base service operations +type Manager struct { + dataDir string + servicesDir string // Path to services directory + manifests map[string]*ServiceManifest // Cached service manifests +} + +// NewManager creates a new services manager +func NewManager(dataDir, servicesDir string) *Manager { + m := &Manager{ + dataDir: dataDir, + servicesDir: servicesDir, + } + + // Load all service manifests + manifests, err := LoadAllManifests(servicesDir) + if err != nil { + // Log error but continue - services without manifests will fall back to hardcoded map + fmt.Printf("Warning: failed to load service manifests: %v\n", err) + manifests = make(map[string]*ServiceManifest) + } + m.manifests = manifests + + return m +} + +// Service represents a base service +type Service struct { + Name string `json:"name"` + Description string `json:"description"` + Status string `json:"status"` + Version string `json:"version"` + Namespace string `json:"namespace"` + Dependencies []string `json:"dependencies,omitempty"` +} + +// Base services in Wild Cloud (kept for reference/validation) +var BaseServices = []string{ + "metallb", // Load balancer + "traefik", // Ingress controller + "cert-manager", // Certificate management + "longhorn", // Storage +} + +// serviceDeployments maps service directory names to their actual namespace and deployment name +var serviceDeployments = map[string]struct { + namespace string + deploymentName string +}{ + "cert-manager": {"cert-manager", "cert-manager"}, + "coredns": {"kube-system", "coredns"}, + "docker-registry": {"docker-registry", "docker-registry"}, + "externaldns": {"externaldns", "external-dns"}, + "kubernetes-dashboard": {"kubernetes-dashboard", "kubernetes-dashboard"}, + "longhorn": {"longhorn-system", "longhorn-ui"}, + "metallb": {"metallb-system", "controller"}, + "nfs": {"nfs-system", "nfs-server"}, + "node-feature-discovery": {"node-feature-discovery", "node-feature-discovery-master"}, + "nvidia-device-plugin": {"nvidia-device-plugin", "nvidia-device-plugin-daemonset"}, + "smtp": {"smtp-system", "smtp"}, + "traefik": {"traefik", "traefik"}, + "utils": {"utils-system", "utils"}, +} + +// checkServiceStatus checks if a service is deployed +func (m *Manager) checkServiceStatus(instanceName, serviceName string) string { + kubeconfigPath := tools.GetKubeconfigPath(m.dataDir, instanceName) + + // If kubeconfig doesn't exist, cluster isn't bootstrapped + if !storage.FileExists(kubeconfigPath) { + return "not-deployed" + } + + kubectl := tools.NewKubectl(kubeconfigPath) + + // Special case: NFS doesn't have a deployment, check for StorageClass instead + if serviceName == "nfs" { + cmd := exec.Command("kubectl", "--kubeconfig", kubeconfigPath, "get", "storageclass", "nfs", "-o", "name") + if err := cmd.Run(); err == nil { + return "deployed" + } + return "not-deployed" + } + + var namespace, deploymentName string + + // Check hardcoded map first for deployment name (has correct names) + if deployment, ok := serviceDeployments[serviceName]; ok { + namespace = deployment.namespace + deploymentName = deployment.deploymentName + } else if manifest, ok := m.manifests[serviceName]; ok { + // Fall back to manifest if not in hardcoded map + namespace = manifest.Namespace + deploymentName = manifest.GetDeploymentName() + } else { + // Service not found anywhere, assume not deployed + return "not-deployed" + } + + if kubectl.DeploymentExists(deploymentName, namespace) { + return "deployed" + } + + return "not-deployed" +} + +// List returns all base services and their status +func (m *Manager) List(instanceName string) ([]Service, error) { + services := []Service{} + + // Discover services from the services directory + entries, err := os.ReadDir(m.servicesDir) + if err != nil { + return nil, fmt.Errorf("failed to read services directory: %w", err) + } + + for _, entry := range entries { + if !entry.IsDir() { + continue // Skip non-directories like README.md + } + + name := entry.Name() + + // Get service info from manifest if available + var namespace, description, version string + var dependencies []string + + if manifest, ok := m.manifests[name]; ok { + namespace = manifest.Namespace + description = manifest.Description + version = manifest.Category // Using category as version for now + dependencies = manifest.Dependencies + } else { + // Fall back to hardcoded map + namespace = name + "-system" // default + if deployment, ok := serviceDeployments[name]; ok { + namespace = deployment.namespace + } + } + + service := Service{ + Name: name, + Status: m.checkServiceStatus(instanceName, name), + Namespace: namespace, + Description: description, + Version: version, + Dependencies: dependencies, + } + + services = append(services, service) + } + + return services, nil +} + +// Get returns a specific service +func (m *Manager) Get(instanceName, serviceName string) (*Service, error) { + // Get the correct namespace from the map + namespace := serviceName + "-system" // default + if deployment, ok := serviceDeployments[serviceName]; ok { + namespace = deployment.namespace + } + + service := &Service{ + Name: serviceName, + Status: m.checkServiceStatus(instanceName, serviceName), + Namespace: namespace, + } + + return service, nil +} + +// Install orchestrates the complete service installation lifecycle +func (m *Manager) Install(instanceName, serviceName string, fetch, deploy bool, opID string, broadcaster *operations.Broadcaster) error { + // Phase 1: Fetch (if requested or files don't exist) + if fetch || !m.serviceFilesExist(instanceName, serviceName) { + if err := m.Fetch(instanceName, serviceName); err != nil { + return fmt.Errorf("fetch failed: %w", err) + } + } + + // Phase 2: Validate Configuration + // Configuration happens via API before calling install + // Validate all required config is set + if err := m.validateConfig(instanceName, serviceName); err != nil { + return fmt.Errorf("configuration incomplete: %w", err) + } + + // Phase 3: Compile templates + if err := m.Compile(instanceName, serviceName); err != nil { + return fmt.Errorf("template compilation failed: %w", err) + } + + // Phase 4: Deploy (if requested) + if deploy { + if err := m.Deploy(instanceName, serviceName, opID, broadcaster); err != nil { + return fmt.Errorf("deployment failed: %w", err) + } + } + + return nil +} + +// InstallAll installs all base services +func (m *Manager) InstallAll(instanceName string, fetch, deploy bool, opID string, broadcaster *operations.Broadcaster) error { + for _, serviceName := range BaseServices { + if err := m.Install(instanceName, serviceName, fetch, deploy, opID, broadcaster); err != nil { + return fmt.Errorf("failed to install %s: %w", serviceName, err) + } + } + + return nil +} + +// Delete removes a service +func (m *Manager) Delete(instanceName, serviceName string) error { + kubeconfigPath := tools.GetKubeconfigPath(m.dataDir, instanceName) + + serviceDir := filepath.Join(m.servicesDir, serviceName) + manifestsFile := filepath.Join(serviceDir, "manifests.yaml") + + if !storage.FileExists(manifestsFile) { + return fmt.Errorf("service %s not found", serviceName) + } + + cmd := exec.Command("kubectl", "delete", "-f", manifestsFile) + tools.WithKubeconfig(cmd, kubeconfigPath) + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("failed to delete service: %w\nOutput: %s", err, string(output)) + } + + return nil +} + +// GetStatus returns detailed status for a service +func (m *Manager) GetStatus(instanceName, serviceName string) (*Service, error) { + // Get the correct namespace from the map + namespace := serviceName + "-system" // default + if deployment, ok := serviceDeployments[serviceName]; ok { + namespace = deployment.namespace + } + + service := &Service{ + Name: serviceName, + Namespace: namespace, + Status: m.checkServiceStatus(instanceName, serviceName), + } + + return service, nil +} + +// GetManifest returns the manifest for a service +func (m *Manager) GetManifest(serviceName string) (*ServiceManifest, error) { + if manifest, ok := m.manifests[serviceName]; ok { + return manifest, nil + } + return nil, fmt.Errorf("service %s not found or has no manifest", serviceName) +} + +// GetServiceConfig returns the service configuration fields from the manifest +func (m *Manager) GetServiceConfig(serviceName string) (map[string]ConfigDefinition, error) { + manifest, err := m.GetManifest(serviceName) + if err != nil { + return nil, err + } + return manifest.ServiceConfig, nil +} + +// GetConfigReferences returns the config references from the manifest +func (m *Manager) GetConfigReferences(serviceName string) ([]string, error) { + manifest, err := m.GetManifest(serviceName) + if err != nil { + return nil, err + } + return manifest.ConfigReferences, nil +} + +// Fetch copies service files from directory to instance +func (m *Manager) Fetch(instanceName, serviceName string) error { + // 1. Validate service exists in directory + sourceDir := filepath.Join(m.servicesDir, serviceName) + if !dirExists(sourceDir) { + return fmt.Errorf("service %s not found in directory", serviceName) + } + + // 2. Create instance service directory + instanceDir := filepath.Join(m.dataDir, "instances", instanceName, + "setup", "cluster-services", serviceName) + if err := os.MkdirAll(instanceDir, 0755); err != nil { + return fmt.Errorf("failed to create service directory: %w", err) + } + + // 3. Copy files: + // - README.md (if exists, optional) + // - install.sh (if exists, optional) + // - kustomize.template/* (if exists, optional) + + // Copy README.md + copyFileIfExists(filepath.Join(sourceDir, "README.md"), + filepath.Join(instanceDir, "README.md")) + + // Copy install.sh (optional) + installSh := filepath.Join(sourceDir, "install.sh") + if fileExists(installSh) { + if err := copyFile(installSh, filepath.Join(instanceDir, "install.sh")); err != nil { + return fmt.Errorf("failed to copy install.sh: %w", err) + } + // Make install.sh executable + os.Chmod(filepath.Join(instanceDir, "install.sh"), 0755) + } + + // Copy kustomize.template directory if it exists + templateDir := filepath.Join(sourceDir, "kustomize.template") + if dirExists(templateDir) { + destTemplateDir := filepath.Join(instanceDir, "kustomize.template") + if err := copyDir(templateDir, destTemplateDir); err != nil { + return fmt.Errorf("failed to copy templates: %w", err) + } + } + + return nil +} + +// serviceFilesExist checks if service files exist in the instance +func (m *Manager) serviceFilesExist(instanceName, serviceName string) bool { + serviceDir := filepath.Join(m.dataDir, "instances", instanceName, + "setup", "cluster-services", serviceName) + installSh := filepath.Join(serviceDir, "install.sh") + return fileExists(installSh) +} + +// Helper functions for file operations + +func fileExists(path string) bool { + _, err := os.Stat(path) + return err == nil +} + +func dirExists(path string) bool { + info, err := os.Stat(path) + return err == nil && info.IsDir() +} + +func copyFile(src, dst string) error { + input, err := os.ReadFile(src) + if err != nil { + return err + } + return os.WriteFile(dst, input, 0644) +} + +func copyFileIfExists(src, dst string) error { + if !fileExists(src) { + return nil + } + return copyFile(src, dst) +} + +func copyDir(src, dst string) error { + // Create destination directory + if err := os.MkdirAll(dst, 0755); err != nil { + return err + } + + // Read source directory + entries, err := os.ReadDir(src) + if err != nil { + return err + } + + // Copy each entry + for _, entry := range entries { + srcPath := filepath.Join(src, entry.Name()) + dstPath := filepath.Join(dst, entry.Name()) + + if entry.IsDir() { + if err := copyDir(srcPath, dstPath); err != nil { + return err + } + } else { + if err := copyFile(srcPath, dstPath); err != nil { + return err + } + } + } + + return nil +} + +// Compile processes gomplate templates into final Kubernetes manifests +func (m *Manager) Compile(instanceName, serviceName string) error { + instanceDir := filepath.Join(m.dataDir, "instances", instanceName) + serviceDir := filepath.Join(instanceDir, "setup", "cluster-services", serviceName) + templateDir := filepath.Join(serviceDir, "kustomize.template") + outputDir := filepath.Join(serviceDir, "kustomize") + + // 1. Check if templates exist + if !dirExists(templateDir) { + // No templates to compile - this is OK for some services + return nil + } + + // 2. Load config and secrets files + configFile := filepath.Join(instanceDir, "config.yaml") + secretsFile := filepath.Join(instanceDir, "secrets.yaml") + + if !fileExists(configFile) { + return fmt.Errorf("config.yaml not found for instance %s", instanceName) + } + + // 3. Create output directory + if err := os.MkdirAll(outputDir, 0755); err != nil { + return fmt.Errorf("failed to create output directory: %w", err) + } + + // 4. Process templates with gomplate + // Build gomplate command + gomplateArgs := []string{ + "-c", fmt.Sprintf(".=%s", configFile), + } + + // Add secrets context if file exists + if fileExists(secretsFile) { + gomplateArgs = append(gomplateArgs, "-c", fmt.Sprintf("secrets=%s", secretsFile)) + } + + // Process each template file recursively + err := filepath.Walk(templateDir, func(srcPath string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + // Skip directories + if info.IsDir() { + return nil + } + + // Calculate relative path and destination + relPath, _ := filepath.Rel(templateDir, srcPath) + dstPath := filepath.Join(outputDir, relPath) + + // Create destination directory + if err := os.MkdirAll(filepath.Dir(dstPath), 0755); err != nil { + return err + } + + // Run gomplate on this file + args := append(gomplateArgs, "-f", srcPath, "-o", dstPath) + cmd := exec.Command("gomplate", args...) + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("gomplate failed for %s: %w\nOutput: %s", relPath, err, output) + } + + return nil + }) + + if err != nil { + return fmt.Errorf("template compilation failed: %w", err) + } + + return nil +} + +// Deploy executes the service-specific install.sh script +// opID and broadcaster are optional - if provided, output will be streamed to SSE clients +func (m *Manager) Deploy(instanceName, serviceName, opID string, broadcaster *operations.Broadcaster) error { + fmt.Printf("[DEBUG] Deploy() called for service=%s instance=%s opID=%s\n", serviceName, instanceName, opID) + + instanceDir := filepath.Join(m.dataDir, "instances", instanceName) + serviceDir := filepath.Join(instanceDir, "setup", "cluster-services", serviceName) + installScript := filepath.Join(serviceDir, "install.sh") + + // 1. Check if install.sh exists + if !fileExists(installScript) { + // No install.sh means nothing to deploy - this is valid for documentation-only services + msg := fmt.Sprintf("â„šī¸ Service %s has no install.sh - nothing to deploy\n", serviceName) + if broadcaster != nil && opID != "" { + broadcaster.Publish(opID, []byte(msg)) + } + return nil + } + fmt.Printf("[DEBUG] Found install script: %s\n", installScript) + + // 2. Set up environment + kubeconfigPath := tools.GetKubeconfigPath(m.dataDir, instanceName) + if !fileExists(kubeconfigPath) { + return fmt.Errorf("kubeconfig not found - cluster may not be bootstrapped") + } + fmt.Printf("[DEBUG] Using kubeconfig: %s\n", kubeconfigPath) + + // Build environment - append to existing environment + // This ensures kubectl and other tools are available + env := os.Environ() + env = append(env, + fmt.Sprintf("WILD_INSTANCE=%s", instanceName), + fmt.Sprintf("WILD_CENTRAL_DATA=%s", m.dataDir), + fmt.Sprintf("KUBECONFIG=%s", kubeconfigPath), + ) + fmt.Printf("[DEBUG] Environment configured: WILD_INSTANCE=%s, KUBECONFIG=%s\n", instanceName, kubeconfigPath) + + // 3. Set up output streaming + var outputWriter *broadcastWriter + if opID != "" { + // Create log directory + logDir := filepath.Join(instanceDir, "operations", opID) + if err := os.MkdirAll(logDir, 0755); err != nil { + return fmt.Errorf("failed to create log directory: %w", err) + } + + // Create log file + logFile, err := os.Create(filepath.Join(logDir, "output.log")) + if err != nil { + return fmt.Errorf("failed to create log file: %w", err) + } + defer logFile.Close() + + // Create broadcast writer + outputWriter = newBroadcastWriter(logFile, broadcaster, opID) + + // Send initial heartbeat message to SSE stream + if broadcaster != nil { + initialMsg := fmt.Sprintf("🚀 Starting deployment of %s...\n", serviceName) + broadcaster.Publish(opID, []byte(initialMsg)) + fmt.Printf("[DEBUG] Sent initial SSE message for opID=%s\n", opID) + } + } + + // 4. Execute install.sh + fmt.Printf("[DEBUG] Executing: /bin/bash %s\n", installScript) + cmd := exec.Command("/bin/bash", installScript) + cmd.Dir = serviceDir + cmd.Env = env + + if outputWriter != nil { + // Stream output to file and SSE clients + cmd.Stdout = outputWriter + cmd.Stderr = outputWriter + fmt.Printf("[DEBUG] Starting command execution for opID=%s\n", opID) + err := cmd.Run() + fmt.Printf("[DEBUG] Command completed for opID=%s, err=%v\n", opID, err) + if broadcaster != nil { + outputWriter.Flush() // Flush any remaining buffered data + broadcaster.Close(opID) // Close all SSE clients + } + return err + } else { + // Fallback: capture output for logging (backward compatibility) + output, err := cmd.CombinedOutput() + fmt.Printf("=== Deploy %s output ===\n%s\n=== End output ===\n", serviceName, output) + if err != nil { + return fmt.Errorf("deployment failed: %w\nOutput: %s", err, output) + } + return nil + } +} + +// validateConfig checks that all required config is set for a service +func (m *Manager) validateConfig(instanceName, serviceName string) error { + manifest, err := m.GetManifest(serviceName) + if err != nil { + return err // Service has no manifest + } + + // Load instance config + instanceDir := filepath.Join(m.dataDir, "instances", instanceName) + configFile := filepath.Join(instanceDir, "config.yaml") + + configData, err := os.ReadFile(configFile) + if err != nil { + return fmt.Errorf("failed to read config: %w", err) + } + + var config map[string]interface{} + if err := yaml.Unmarshal(configData, &config); err != nil { + return fmt.Errorf("failed to parse config: %w", err) + } + + // Check all required paths exist + missing := []string{} + allPaths := append(manifest.ConfigReferences, manifest.GetRequiredConfig()...) + + for _, path := range allPaths { + if getNestedValue(config, path) == nil { + missing = append(missing, path) + } + } + + if len(missing) > 0 { + return fmt.Errorf("missing required configuration: %v", missing) + } + + return nil +} + +// getNestedValue retrieves a value from nested map using dot notation +func getNestedValue(data map[string]interface{}, path string) interface{} { + keys := strings.Split(path, ".") + current := data + + for i, key := range keys { + if i == len(keys)-1 { + return current[key] + } + + if next, ok := current[key].(map[string]interface{}); ok { + current = next + } else { + return nil + } + } + + return nil +} diff --git a/internal/storage/storage.go b/internal/storage/storage.go new file mode 100644 index 0000000..10bb682 --- /dev/null +++ b/internal/storage/storage.go @@ -0,0 +1,110 @@ +package storage + +import ( + "fmt" + "os" + "path/filepath" + "syscall" +) + +// EnsureDir creates a directory with specified permissions if it doesn't exist +func EnsureDir(path string, perm os.FileMode) error { + if err := os.MkdirAll(path, perm); err != nil { + return fmt.Errorf("creating directory %s: %w", path, err) + } + return nil +} + +// FileExists checks if a file exists +func FileExists(path string) bool { + _, err := os.Stat(path) + return err == nil +} + +// WriteFile writes content to a file with specified permissions +func WriteFile(path string, content []byte, perm os.FileMode) error { + if err := os.WriteFile(path, content, perm); err != nil { + return fmt.Errorf("writing file %s: %w", path, err) + } + return nil +} + +// ReadFile reads content from a file +func ReadFile(path string) ([]byte, error) { + content, err := os.ReadFile(path) + if err != nil { + return nil, fmt.Errorf("reading file %s: %w", path, err) + } + return content, nil +} + +// Lock represents a file lock +type Lock struct { + file *os.File + path string +} + +// AcquireLock acquires an exclusive lock on a file +func AcquireLock(lockPath string) (*Lock, error) { + // Ensure lock directory exists + if err := EnsureDir(filepath.Dir(lockPath), 0755); err != nil { + return nil, err + } + + // Open or create lock file + file, err := os.OpenFile(lockPath, os.O_CREATE|os.O_RDWR, 0644) + if err != nil { + return nil, fmt.Errorf("opening lock file %s: %w", lockPath, err) + } + + // Acquire exclusive lock with flock + if err := syscall.Flock(int(file.Fd()), syscall.LOCK_EX); err != nil { + file.Close() + return nil, fmt.Errorf("acquiring lock on %s: %w", lockPath, err) + } + + return &Lock{ + file: file, + path: lockPath, + }, nil +} + +// Release releases the file lock +func (l *Lock) Release() error { + if l.file == nil { + return nil + } + + // Release flock + if err := syscall.Flock(int(l.file.Fd()), syscall.LOCK_UN); err != nil { + l.file.Close() + return fmt.Errorf("releasing lock on %s: %w", l.path, err) + } + + // Close file + if err := l.file.Close(); err != nil { + return fmt.Errorf("closing lock file %s: %w", l.path, err) + } + + l.file = nil + return nil +} + +// WithLock executes a function while holding a lock +func WithLock(lockPath string, fn func() error) error { + lock, err := AcquireLock(lockPath) + if err != nil { + return err + } + defer lock.Release() + + return fn() +} + +// EnsureFilePermissions ensures a file has the correct permissions +func EnsureFilePermissions(path string, perm os.FileMode) error { + if err := os.Chmod(path, perm); err != nil { + return fmt.Errorf("setting permissions on %s: %w", path, err) + } + return nil +} diff --git a/internal/storage/storage_test.go b/internal/storage/storage_test.go new file mode 100644 index 0000000..a266ee1 --- /dev/null +++ b/internal/storage/storage_test.go @@ -0,0 +1,107 @@ +package storage + +import ( + "os" + "path/filepath" + "testing" +) + +func TestEnsureDir(t *testing.T) { + tmpDir := t.TempDir() + testDir := filepath.Join(tmpDir, "test", "nested", "dir") + + err := EnsureDir(testDir, 0755) + if err != nil { + t.Fatalf("EnsureDir failed: %v", err) + } + + // Verify directory exists + info, err := os.Stat(testDir) + if err != nil { + t.Fatalf("Directory not created: %v", err) + } + if !info.IsDir() { + t.Fatalf("Path is not a directory") + } + + // Calling again should be idempotent + err = EnsureDir(testDir, 0755) + if err != nil { + t.Fatalf("EnsureDir not idempotent: %v", err) + } +} + +func TestWriteFile(t *testing.T) { + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "test.txt") + testData := []byte("test content") + + // Write file + err := WriteFile(testFile, testData, 0644) + if err != nil { + t.Fatalf("WriteFile failed: %v", err) + } + + // Read file back + data, err := os.ReadFile(testFile) + if err != nil { + t.Fatalf("ReadFile failed: %v", err) + } + + if string(data) != string(testData) { + t.Fatalf("Data mismatch: got %q, want %q", string(data), string(testData)) + } +} + +func TestFileExists(t *testing.T) { + tmpDir := t.TempDir() + testFile := filepath.Join(tmpDir, "test.txt") + + // File should not exist initially + if FileExists(testFile) { + t.Fatalf("File should not exist") + } + + // Create file + err := WriteFile(testFile, []byte("test"), 0644) + if err != nil { + t.Fatalf("WriteFile failed: %v", err) + } + + // File should exist now + if !FileExists(testFile) { + t.Fatalf("File should exist") + } +} + +func TestWithLock(t *testing.T) { + tmpDir := t.TempDir() + lockFile := filepath.Join(tmpDir, "test.lock") + counter := 0 + + // Execute with lock + err := WithLock(lockFile, func() error { + counter++ + return nil + }) + if err != nil { + t.Fatalf("WithLock failed: %v", err) + } + + if counter != 1 { + t.Fatalf("Function not executed: counter=%d", counter) + } + + // Should be idempotent - can acquire lock multiple times sequentially + err = WithLock(lockFile, func() error { + counter++ + return nil + }) + if err != nil { + t.Fatalf("WithLock failed on second call: %v", err) + } + + if counter != 2 { + t.Fatalf("Function not executed on second call: counter=%d", counter) + } +} diff --git a/internal/tools/context.go b/internal/tools/context.go new file mode 100644 index 0000000..293e52a --- /dev/null +++ b/internal/tools/context.go @@ -0,0 +1,37 @@ +package tools + +import ( + "os" + "os/exec" + "path/filepath" +) + +// WithTalosconfig sets the TALOSCONFIG environment variable for a command +// This allows talosctl commands to use the correct context without global state +func WithTalosconfig(cmd *exec.Cmd, talosconfigPath string) *exec.Cmd { + if cmd.Env == nil { + cmd.Env = os.Environ() + } + cmd.Env = append(cmd.Env, "TALOSCONFIG="+talosconfigPath) + return cmd +} + +// WithKubeconfig sets the KUBECONFIG environment variable for a command +// This allows kubectl commands to use the correct context without global state +func WithKubeconfig(cmd *exec.Cmd, kubeconfigPath string) *exec.Cmd { + if cmd.Env == nil { + cmd.Env = os.Environ() + } + cmd.Env = append(cmd.Env, "KUBECONFIG="+kubeconfigPath) + return cmd +} + +// GetTalosconfigPath returns the path to the talosconfig for an instance +func GetTalosconfigPath(dataDir, instanceName string) string { + return filepath.Join(dataDir, "instances", instanceName, "talos", "generated", "talosconfig") +} + +// GetKubeconfigPath returns the path to the kubeconfig for an instance +func GetKubeconfigPath(dataDir, instanceName string) string { + return filepath.Join(dataDir, "instances", instanceName, "kubeconfig") +} diff --git a/internal/tools/gomplate.go b/internal/tools/gomplate.go new file mode 100644 index 0000000..238ae5b --- /dev/null +++ b/internal/tools/gomplate.go @@ -0,0 +1,111 @@ +package tools + +import ( + "bytes" + "fmt" + "os/exec" + "strings" +) + +// Gomplate provides a wrapper around the gomplate command-line tool +type Gomplate struct { + gomplatePath string +} + +// NewGomplate creates a new Gomplate wrapper +func NewGomplate() *Gomplate { + // Find gomplate in PATH + path, err := exec.LookPath("gomplate") + if err != nil { + // Default to "gomplate" and let exec handle the error + path = "gomplate" + } + return &Gomplate{gomplatePath: path} +} + +// Render renders a template file with the given data sources +func (g *Gomplate) Render(templatePath, outputPath string, dataSources map[string]string) error { + args := []string{ + "-f", templatePath, + "-o", outputPath, + } + + // Add data sources + for name, path := range dataSources { + args = append(args, "-d", fmt.Sprintf("%s=%s", name, path)) + } + + cmd := exec.Command(g.gomplatePath, args...) + + var stderr bytes.Buffer + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + return fmt.Errorf("gomplate render failed: %w, stderr: %s", err, stderr.String()) + } + + return nil +} + +// RenderString renders a template string with the given data sources +func (g *Gomplate) RenderString(template string, dataSources map[string]string) (string, error) { + args := []string{ + "-i", template, + } + + // Add data sources + for name, path := range dataSources { + args = append(args, "-d", fmt.Sprintf("%s=%s", name, path)) + } + + cmd := exec.Command(g.gomplatePath, args...) + + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + return "", fmt.Errorf("gomplate render string failed: %w, stderr: %s", err, stderr.String()) + } + + return strings.TrimSpace(stdout.String()), nil +} + +// RenderWithContext renders a template with context values passed as arguments +func (g *Gomplate) RenderWithContext(templatePath, outputPath string, context map[string]string) error { + args := []string{ + "-f", templatePath, + "-o", outputPath, + } + + // Add context values + for key, value := range context { + args = append(args, "-c", fmt.Sprintf("%s=%s", key, value)) + } + + cmd := exec.Command(g.gomplatePath, args...) + + var stderr bytes.Buffer + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + return fmt.Errorf("gomplate render with context failed: %w, stderr: %s", err, stderr.String()) + } + + return nil +} + +// Exec executes gomplate with arbitrary arguments +func (g *Gomplate) Exec(args ...string) (string, error) { + cmd := exec.Command(g.gomplatePath, args...) + + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + return "", fmt.Errorf("gomplate exec failed: %w, stderr: %s", err, stderr.String()) + } + + return strings.TrimSpace(stdout.String()), nil +} diff --git a/internal/tools/kubectl.go b/internal/tools/kubectl.go new file mode 100644 index 0000000..b53b1a1 --- /dev/null +++ b/internal/tools/kubectl.go @@ -0,0 +1,33 @@ +package tools + +import ( + "os/exec" +) + +// Kubectl provides a thin wrapper around the kubectl command-line tool +type Kubectl struct { + kubeconfigPath string +} + +// NewKubectl creates a new Kubectl wrapper +func NewKubectl(kubeconfigPath string) *Kubectl { + return &Kubectl{ + kubeconfigPath: kubeconfigPath, + } +} + +// DeploymentExists checks if a deployment exists in the specified namespace +func (k *Kubectl) DeploymentExists(name, namespace string) bool { + args := []string{ + "get", "deployment", name, + "-n", namespace, + } + + if k.kubeconfigPath != "" { + args = append([]string{"--kubeconfig", k.kubeconfigPath}, args...) + } + + cmd := exec.Command("kubectl", args...) + err := cmd.Run() + return err == nil +} diff --git a/internal/tools/talosctl.go b/internal/tools/talosctl.go new file mode 100644 index 0000000..1c55588 --- /dev/null +++ b/internal/tools/talosctl.go @@ -0,0 +1,362 @@ +package tools + +import ( + "encoding/json" + "fmt" + "os/exec" + "strings" +) + +// Talosctl provides a thin wrapper around the talosctl command-line tool +type Talosctl struct { + talosconfigPath string +} + +// NewTalosctl creates a new Talosctl wrapper +func NewTalosctl() *Talosctl { + return &Talosctl{} +} + +// NewTalosconfigWithConfig creates a new Talosctl wrapper with a specific talosconfig +func NewTalosconfigWithConfig(talosconfigPath string) *Talosctl { + return &Talosctl{ + talosconfigPath: talosconfigPath, + } +} + +// buildArgs adds talosconfig to args if set +func (t *Talosctl) buildArgs(baseArgs []string) []string { + if t.talosconfigPath != "" { + return append([]string{"--talosconfig", t.talosconfigPath}, baseArgs...) + } + return baseArgs +} + +// GenConfig generates Talos configuration files +func (t *Talosctl) GenConfig(clusterName, endpoint, outputDir string) error { + args := []string{ + "gen", "config", + clusterName, + endpoint, + "--output-dir", outputDir, + } + + cmd := exec.Command("talosctl", args...) + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("talosctl gen config failed: %w\nOutput: %s", err, string(output)) + } + + return nil +} + +// ApplyConfig applies configuration to a node +func (t *Talosctl) ApplyConfig(nodeIP, configFile string, insecure bool, talosconfigPath string) error { + args := []string{ + "apply-config", + "--nodes", nodeIP, + "--file", configFile, + } + + if insecure { + args = append(args, "--insecure") + } + + cmd := exec.Command("talosctl", args...) + if talosconfigPath != "" { + WithTalosconfig(cmd, talosconfigPath) + } + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("talosctl apply-config failed: %w\nOutput: %s", err, string(output)) + } + + return nil +} + +// DiskInfo represents disk information including path and size +type DiskInfo struct { + Path string `json:"path"` + Size int64 `json:"size"` +} + +// GetDisks queries available disks from a node (filters to disks > 10GB) +func (t *Talosctl) GetDisks(nodeIP string, insecure bool) ([]DiskInfo, error) { + args := []string{ + "get", "disks", + "--nodes", nodeIP, + "-o", "json", + } + + if insecure { + args = append(args, "--insecure") + } + + // Use jq to slurp the NDJSON into an array (like v.PoC does with jq -s) + talosCmd := exec.Command("talosctl", args...) + jqCmd := exec.Command("jq", "-s", ".") + + // Pipe talosctl output to jq + jqCmd.Stdin, _ = talosCmd.StdoutPipe() + + if err := talosCmd.Start(); err != nil { + return nil, fmt.Errorf("failed to start talosctl: %w", err) + } + + output, err := jqCmd.CombinedOutput() + if err != nil { + return nil, fmt.Errorf("failed to process disks JSON: %w\nOutput: %s", err, string(output)) + } + + if err := talosCmd.Wait(); err != nil { + return nil, fmt.Errorf("talosctl get disks failed: %w", err) + } + + var result []map[string]interface{} + if err := json.Unmarshal(output, &result); err != nil { + return nil, fmt.Errorf("failed to parse disks JSON: %w", err) + } + + disks := []DiskInfo{} + for _, item := range result { + metadata, ok := item["metadata"].(map[string]interface{}) + if !ok { + continue + } + + id, ok := metadata["id"].(string) + if !ok { + continue + } + + spec, ok := item["spec"].(map[string]interface{}) + if !ok { + continue + } + + // Extract size - can be float64 or int + var size int64 + switch v := spec["size"].(type) { + case float64: + size = int64(v) + case int64: + size = v + case int: + size = int64(v) + default: + continue + } + + // Filter to disks > 10GB (like v.PoC does) + if size > 10000000000 { + disks = append(disks, DiskInfo{ + Path: "/dev/" + id, + Size: size, + }) + } + } + + return disks, nil +} + +// GetLinks queries network interfaces from a node +func (t *Talosctl) GetLinks(nodeIP string, insecure bool) ([]map[string]interface{}, error) { + args := []string{ + "get", "links", + "--nodes", nodeIP, + "-o", "json", + } + + if insecure { + args = append(args, "--insecure") + } + + // Use jq to slurp the NDJSON into an array (like v.PoC does with jq -s) + talosCmd := exec.Command("talosctl", args...) + jqCmd := exec.Command("jq", "-s", ".") + + // Pipe talosctl output to jq + jqCmd.Stdin, _ = talosCmd.StdoutPipe() + + if err := talosCmd.Start(); err != nil { + return nil, fmt.Errorf("failed to start talosctl: %w", err) + } + + output, err := jqCmd.CombinedOutput() + if err != nil { + return nil, fmt.Errorf("failed to process links JSON: %w\nOutput: %s", err, string(output)) + } + + if err := talosCmd.Wait(); err != nil { + return nil, fmt.Errorf("talosctl get links failed: %w", err) + } + + var result []map[string]interface{} + if err := json.Unmarshal(output, &result); err != nil { + return nil, fmt.Errorf("failed to parse links JSON: %w", err) + } + + return result, nil +} + +// GetRoutes queries routing table from a node +func (t *Talosctl) GetRoutes(nodeIP string, insecure bool) ([]map[string]interface{}, error) { + args := []string{ + "get", "routes", + "--nodes", nodeIP, + "-o", "json", + } + + if insecure { + args = append(args, "--insecure") + } + + // Use jq to slurp the NDJSON into an array (like v.PoC does with jq -s) + talosCmd := exec.Command("talosctl", args...) + jqCmd := exec.Command("jq", "-s", ".") + + // Pipe talosctl output to jq + jqCmd.Stdin, _ = talosCmd.StdoutPipe() + + if err := talosCmd.Start(); err != nil { + return nil, fmt.Errorf("failed to start talosctl: %w", err) + } + + output, err := jqCmd.CombinedOutput() + if err != nil { + return nil, fmt.Errorf("failed to process routes JSON: %w\nOutput: %s", err, string(output)) + } + + if err := talosCmd.Wait(); err != nil { + return nil, fmt.Errorf("talosctl get routes failed: %w", err) + } + + var result []map[string]interface{} + if err := json.Unmarshal(output, &result); err != nil { + return nil, fmt.Errorf("failed to parse routes JSON: %w", err) + } + + return result, nil +} + +// GetDefaultInterface finds the interface with the default route +func (t *Talosctl) GetDefaultInterface(nodeIP string, insecure bool) (string, error) { + routes, err := t.GetRoutes(nodeIP, insecure) + if err != nil { + return "", err + } + + // Find route with destination 0.0.0.0/0 (default route) + for _, route := range routes { + if spec, ok := route["spec"].(map[string]interface{}); ok { + destination, _ := spec["destination"].(string) + gateway, _ := spec["gateway"].(string) + if destination == "0.0.0.0/0" && gateway != "" { + if outLink, ok := spec["outLinkName"].(string); ok { + return outLink, nil + } + } + } + } + + return "", fmt.Errorf("no default route found") +} + +// GetPhysicalInterface finds the first physical ethernet interface +func (t *Talosctl) GetPhysicalInterface(nodeIP string, insecure bool) (string, error) { + links, err := t.GetLinks(nodeIP, insecure) + if err != nil { + return "", err + } + + // Look for physical ethernet interfaces (eth*, en*, eno*, ens*, enp*) + for _, link := range links { + metadata, ok := link["metadata"].(map[string]interface{}) + if !ok { + continue + } + + id, ok := metadata["id"].(string) + if !ok || id == "lo" { + continue + } + + spec, ok := link["spec"].(map[string]interface{}) + if !ok { + continue + } + + // Check if it's ethernet and up + linkType, _ := spec["type"].(string) + operState, _ := spec["operationalState"].(string) + + if linkType == "ether" && operState == "up" { + // Prefer interfaces starting with eth, en + if strings.HasPrefix(id, "eth") || strings.HasPrefix(id, "en") { + // Skip virtual interfaces (cni, flannel, docker, br-, veth) + if !strings.Contains(id, "cni") && + !strings.Contains(id, "flannel") && + !strings.Contains(id, "docker") && + !strings.HasPrefix(id, "br-") && + !strings.HasPrefix(id, "veth") { + return id, nil + } + } + } + } + + return "", fmt.Errorf("no physical ethernet interface found") +} + +// GetVersion gets Talos version from a node +func (t *Talosctl) GetVersion(nodeIP string, insecure bool) (string, error) { + args := t.buildArgs([]string{ + "version", + "--nodes", nodeIP, + "--short", + }) + + if insecure { + args = append(args, "--insecure") + } + + cmd := exec.Command("talosctl", args...) + output, err := cmd.CombinedOutput() + if err != nil { + return "", fmt.Errorf("talosctl version failed: %w\nOutput: %s", err, string(output)) + } + + // Parse output to extract server version + // Output format: + // Client: + // Talos v1.11.2 + // Server: + // NODE: ... + // Tag: v1.11.0 + lines := strings.Split(string(output), "\n") + for i, line := range lines { + if strings.Contains(line, "Tag:") { + // Extract version from "Tag: v1.11.0" format + parts := strings.Fields(line) + if len(parts) >= 2 { + return parts[len(parts)-1], nil + } + } + // Also check for simple "Talos vX.Y.Z" format + if strings.HasPrefix(strings.TrimSpace(line), "Talos v") && i < 3 { + return strings.TrimSpace(strings.TrimPrefix(line, "Talos ")), nil + } + } + + return strings.TrimSpace(string(output)), nil +} + +// Validate checks if talosctl is available +func (t *Talosctl) Validate() error { + cmd := exec.Command("talosctl", "version", "--client") + output, err := cmd.CombinedOutput() + if err != nil { + return fmt.Errorf("talosctl not found or not working: %w\nOutput: %s", err, string(output)) + } + return nil +} diff --git a/internal/tools/yq.go b/internal/tools/yq.go new file mode 100644 index 0000000..c41c094 --- /dev/null +++ b/internal/tools/yq.go @@ -0,0 +1,133 @@ +package tools + +import ( + "bytes" + "fmt" + "os/exec" + "strings" +) + +// YQ provides a wrapper around the yq command-line tool +type YQ struct { + yqPath string +} + +// NewYQ creates a new YQ wrapper +func NewYQ() *YQ { + // Find yq in PATH + path, err := exec.LookPath("yq") + if err != nil { + // Default to "yq" and let exec handle the error + path = "yq" + } + return &YQ{yqPath: path} +} + +// Get retrieves a value from a YAML file using a yq expression +func (y *YQ) Get(filePath, expression string) (string, error) { + cmd := exec.Command(y.yqPath, expression, filePath) + + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + return "", fmt.Errorf("yq get failed: %w, stderr: %s", err, stderr.String()) + } + + return strings.TrimSpace(stdout.String()), nil +} + +// Set sets a value in a YAML file using a yq expression +func (y *YQ) Set(filePath, expression, value string) error { + // yq -i '.path = "value"' file.yaml + // Ensure expression starts with '.' for yq v4 syntax + if !strings.HasPrefix(expression, ".") { + expression = "." + expression + } + + // Properly quote the value to handle special characters + quotedValue := fmt.Sprintf(`"%s"`, strings.ReplaceAll(value, `"`, `\"`)) + setExpr := fmt.Sprintf("%s = %s", expression, quotedValue) + cmd := exec.Command(y.yqPath, "-i", setExpr, filePath) + + var stderr bytes.Buffer + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + return fmt.Errorf("yq set failed: %w, stderr: %s", err, stderr.String()) + } + + return nil +} + +// Merge merges two YAML files +func (y *YQ) Merge(file1, file2, outputFile string) error { + // yq eval-all '. as $item ireduce ({}; . * $item)' file1.yaml file2.yaml > output.yaml + cmd := exec.Command(y.yqPath, "eval-all", ". as $item ireduce ({}; . * $item)", file1, file2) + + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + return fmt.Errorf("yq merge failed: %w, stderr: %s", err, stderr.String()) + } + + // Write output + return exec.Command("sh", "-c", fmt.Sprintf("echo '%s' > %s", stdout.String(), outputFile)).Run() +} + +// Delete removes a key from a YAML file +func (y *YQ) Delete(filePath, expression string) error { + // yq -i 'del(.path)' file.yaml + delExpr := fmt.Sprintf("del(%s)", expression) + cmd := exec.Command(y.yqPath, "-i", delExpr, filePath) + + var stderr bytes.Buffer + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + return fmt.Errorf("yq delete failed: %w, stderr: %s", err, stderr.String()) + } + + return nil +} + +// Validate checks if a YAML file is valid +func (y *YQ) Validate(filePath string) error { + cmd := exec.Command(y.yqPath, "eval", ".", filePath) + + var stderr bytes.Buffer + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + return fmt.Errorf("yaml validation failed: %w, stderr: %s", err, stderr.String()) + } + + return nil +} + +// Exec executes an arbitrary yq expression on a file +func (y *YQ) Exec(args ...string) ([]byte, error) { + cmd := exec.Command(y.yqPath, args...) + + var stdout, stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + + if err := cmd.Run(); err != nil { + return nil, fmt.Errorf("yq exec failed: %w, stderr: %s", err, stderr.String()) + } + + return stdout.Bytes(), nil +} + +// CleanYQOutput removes trailing newlines and "null" values from yq output +func CleanYQOutput(output string) string { + output = strings.TrimSpace(output) + if output == "null" { + return "" + } + return output +} diff --git a/internal/utilities/utilities.go b/internal/utilities/utilities.go new file mode 100644 index 0000000..cf7d6c6 --- /dev/null +++ b/internal/utilities/utilities.go @@ -0,0 +1,300 @@ +// Package utilities provides helper functions for cluster operations +package utilities + +import ( + "encoding/base64" + "encoding/json" + "fmt" + "os/exec" + "strings" +) + +// HealthStatus represents cluster health information +type HealthStatus struct { + Overall string `json:"overall"` // healthy, degraded, unhealthy + Components map[string]string `json:"components"` // component -> status + Issues []string `json:"issues"` +} + +// DashboardToken represents a Kubernetes dashboard token +type DashboardToken struct { + Token string `json:"token"` + ExpiresAt string `json:"expires_at,omitempty"` +} + +// NodeIP represents a node's IP address information +type NodeIP struct { + Hostname string `json:"hostname"` + InternalIP string `json:"internal_ip"` + ExternalIP string `json:"external_ip,omitempty"` +} + +// GetClusterHealth checks the health of cluster components +func GetClusterHealth(kubeconfigPath string) (*HealthStatus, error) { + status := &HealthStatus{ + Overall: "healthy", + Components: make(map[string]string), + Issues: []string{}, + } + + // Check MetalLB + if err := checkComponent(kubeconfigPath, "MetalLB", "metallb-system", "app=metallb"); err != nil { + status.Components["metallb"] = "unhealthy" + status.Issues = append(status.Issues, fmt.Sprintf("MetalLB: %v", err)) + status.Overall = "degraded" + } else { + status.Components["metallb"] = "healthy" + } + + // Check Traefik + if err := checkComponent(kubeconfigPath, "Traefik", "traefik", "app.kubernetes.io/name=traefik"); err != nil { + status.Components["traefik"] = "unhealthy" + status.Issues = append(status.Issues, fmt.Sprintf("Traefik: %v", err)) + status.Overall = "degraded" + } else { + status.Components["traefik"] = "healthy" + } + + // Check cert-manager + if err := checkComponent(kubeconfigPath, "cert-manager", "cert-manager", "app.kubernetes.io/instance=cert-manager"); err != nil { + status.Components["cert-manager"] = "unhealthy" + status.Issues = append(status.Issues, fmt.Sprintf("cert-manager: %v", err)) + status.Overall = "degraded" + } else { + status.Components["cert-manager"] = "healthy" + } + + // Check Longhorn + if err := checkComponent(kubeconfigPath, "Longhorn", "longhorn-system", "app=longhorn-manager"); err != nil { + status.Components["longhorn"] = "unhealthy" + status.Issues = append(status.Issues, fmt.Sprintf("Longhorn: %v", err)) + status.Overall = "degraded" + } else { + status.Components["longhorn"] = "healthy" + } + + if len(status.Issues) > 3 { + status.Overall = "unhealthy" + } + + return status, nil +} + +// checkComponent checks if a component is running +func checkComponent(kubeconfigPath, name, namespace, selector string) error { + args := []string{"get", "pods", "-n", namespace, "-l", selector, "-o", "json"} + if kubeconfigPath != "" { + args = append([]string{"--kubeconfig", kubeconfigPath}, args...) + } + + cmd := exec.Command("kubectl", args...) + output, err := cmd.Output() + if err != nil { + return fmt.Errorf("failed to get pods: %w", err) + } + + var result struct { + Items []struct { + Status struct { + Phase string `json:"phase"` + ContainerStatuses []struct { + Ready bool `json:"ready"` + } `json:"containerStatuses"` + } `json:"status"` + } `json:"items"` + } + + if err := json.Unmarshal(output, &result); err != nil { + return fmt.Errorf("failed to parse output: %w", err) + } + + if len(result.Items) == 0 { + return fmt.Errorf("no pods found") + } + + for _, pod := range result.Items { + if pod.Status.Phase != "Running" { + return fmt.Errorf("pod not running (phase: %s)", pod.Status.Phase) + } + for _, container := range pod.Status.ContainerStatuses { + if !container.Ready { + return fmt.Errorf("container not ready") + } + } + } + + return nil +} + +// GetDashboardToken retrieves or creates a Kubernetes dashboard token +func GetDashboardToken() (*DashboardToken, error) { + // Check if service account exists + cmd := exec.Command("kubectl", "get", "serviceaccount", "-n", "kubernetes-dashboard", "dashboard-admin") + if err := cmd.Run(); err != nil { + return nil, fmt.Errorf("dashboard-admin service account not found") + } + + // Create token + cmd = exec.Command("kubectl", "-n", "kubernetes-dashboard", "create", "token", "dashboard-admin") + output, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("failed to create token: %w", err) + } + + token := strings.TrimSpace(string(output)) + return &DashboardToken{ + Token: token, + }, nil +} + +// GetDashboardTokenFromSecret retrieves dashboard token from secret (fallback method) +func GetDashboardTokenFromSecret() (*DashboardToken, error) { + cmd := exec.Command("kubectl", "-n", "kubernetes-dashboard", "get", "secret", + "dashboard-admin-token", "-o", "jsonpath={.data.token}") + output, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("failed to get token secret: %w", err) + } + + decoded, err := base64.StdEncoding.DecodeString(string(output)) + if err != nil { + return nil, fmt.Errorf("failed to decode token: %w", err) + } + + return &DashboardToken{ + Token: string(decoded), + }, nil +} + +// GetNodeIPs returns IP addresses for all cluster nodes +func GetNodeIPs() ([]*NodeIP, error) { + cmd := exec.Command("kubectl", "get", "nodes", "-o", "json") + output, err := cmd.Output() + if err != nil { + return nil, fmt.Errorf("failed to get nodes: %w", err) + } + + var result struct { + Items []struct { + Metadata struct { + Name string `json:"name"` + } `json:"metadata"` + Status struct { + Addresses []struct { + Type string `json:"type"` + Address string `json:"address"` + } `json:"addresses"` + } `json:"status"` + } `json:"items"` + } + + if err := json.Unmarshal(output, &result); err != nil { + return nil, fmt.Errorf("failed to parse output: %w", err) + } + + var nodes []*NodeIP + for _, item := range result.Items { + node := &NodeIP{ + Hostname: item.Metadata.Name, + } + for _, addr := range item.Status.Addresses { + switch addr.Type { + case "InternalIP": + node.InternalIP = addr.Address + case "ExternalIP": + node.ExternalIP = addr.Address + } + } + nodes = append(nodes, node) + } + + return nodes, nil +} + +// GetControlPlaneIP returns the IP of the first control plane node +func GetControlPlaneIP() (string, error) { + cmd := exec.Command("kubectl", "get", "nodes", "-l", "node-role.kubernetes.io/control-plane", + "-o", "jsonpath={.items[0].status.addresses[?(@.type==\"InternalIP\")].address}") + output, err := cmd.Output() + if err != nil { + return "", fmt.Errorf("failed to get control plane IP: %w", err) + } + + ip := strings.TrimSpace(string(output)) + if ip == "" { + return "", fmt.Errorf("no control plane IP found") + } + + return ip, nil +} + +// CopySecretBetweenNamespaces copies a secret from one namespace to another +func CopySecretBetweenNamespaces(secretName, srcNamespace, dstNamespace string) error { + // Get secret from source namespace + cmd := exec.Command("kubectl", "get", "secret", "-n", srcNamespace, secretName, "-o", "json") + output, err := cmd.Output() + if err != nil { + return fmt.Errorf("failed to get secret from %s: %w", srcNamespace, err) + } + + // Parse and modify secret + var secret map[string]interface{} + if err := json.Unmarshal(output, &secret); err != nil { + return fmt.Errorf("failed to parse secret: %w", err) + } + + // Remove fields that shouldn't be copied + if metadata, ok := secret["metadata"].(map[string]interface{}); ok { + delete(metadata, "resourceVersion") + delete(metadata, "uid") + delete(metadata, "creationTimestamp") + metadata["namespace"] = dstNamespace + } + + // Convert back to JSON + secretJSON, err := json.Marshal(secret) + if err != nil { + return fmt.Errorf("failed to marshal secret: %w", err) + } + + // Apply to destination namespace + cmd = exec.Command("kubectl", "apply", "-f", "-") + cmd.Stdin = strings.NewReader(string(secretJSON)) + if output, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("failed to apply secret to %s: %w\nOutput: %s", dstNamespace, err, string(output)) + } + + return nil +} + +// GetClusterVersion returns the Kubernetes cluster version +func GetClusterVersion() (string, error) { + cmd := exec.Command("kubectl", "version", "-o", "json") + output, err := cmd.Output() + if err != nil { + return "", fmt.Errorf("failed to get cluster version: %w", err) + } + + var result struct { + ServerVersion struct { + GitVersion string `json:"gitVersion"` + } `json:"serverVersion"` + } + + if err := json.Unmarshal(output, &result); err != nil { + return "", fmt.Errorf("failed to parse version: %w", err) + } + + return result.ServerVersion.GitVersion, nil +} + +// GetTalosVersion returns the Talos version for nodes +func GetTalosVersion() (string, error) { + cmd := exec.Command("talosctl", "version", "--short") + output, err := cmd.Output() + if err != nil { + return "", fmt.Errorf("failed to get Talos version: %w", err) + } + + return strings.TrimSpace(string(output)), nil +} diff --git a/main.go b/main.go new file mode 100644 index 0000000..1ea0604 --- /dev/null +++ b/main.go @@ -0,0 +1,69 @@ +package main + +import ( + "fmt" + "log" + "net/http" + "os" + "time" + + "github.com/gorilla/mux" + + v1 "github.com/wild-cloud/wild-central/daemon/internal/api/v1" +) + +var startTime time.Time + +func main() { + // Record start time + startTime = time.Now() + + // Get data directory from environment or use default + dataDir := os.Getenv("WILD_CENTRAL_DATA") + if dataDir == "" { + dataDir = "/var/lib/wild-central" + } + + // Get directory path from environment (required) + directoryPath := os.Getenv("WILD_DIRECTORY") + if directoryPath == "" { + log.Fatal("WILD_DIRECTORY environment variable is required") + } + + // Create API handler with all dependencies + api, err := v1.NewAPI(dataDir, directoryPath) + if err != nil { + log.Fatalf("Failed to initialize API: %v", err) + } + + // Set up HTTP router + router := mux.NewRouter() + + // Register Phase 1 API routes + api.RegisterRoutes(router) + + // Health check endpoint + router.HandleFunc("/api/v1/health", func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + fmt.Fprintf(w, `{"status":"ok"}`) + }).Methods("GET") + + // Status endpoint + router.HandleFunc("/api/v1/status", func(w http.ResponseWriter, r *http.Request) { + api.StatusHandler(w, r, startTime, dataDir, directoryPath) + }).Methods("GET") + + // Default server settings + host := "0.0.0.0" + port := 5055 + + addr := fmt.Sprintf("%s:%d", host, port) + log.Printf("Starting wild-central daemon on %s", addr) + log.Printf("Data directory: %s", dataDir) + log.Printf("Wild Cloud Directory: %s", directoryPath) + + if err := http.ListenAndServe(addr, router); err != nil { + log.Fatal("Server failed to start:", err) + } +}